2
0

pig.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*
  2. * Pig Latin Mode for CodeMirror 2
  3. * @author Prasanth Jayachandran
  4. * @link https://github.com/prasanthj/pig-codemirror-2
  5. * This implementation is adapted from PL/SQL mode in CodeMirror 2.
  6. */
  7. CodeMirror.defineMode("pig", function(_config, parserConfig) {
  8. var keywords = parserConfig.keywords,
  9. builtins = parserConfig.builtins,
  10. types = parserConfig.types,
  11. multiLineStrings = parserConfig.multiLineStrings;
  12. var isOperatorChar = /[*+\-%<>=&?:\/!|]/;
  13. function chain(stream, state, f) {
  14. state.tokenize = f;
  15. return f(stream, state);
  16. }
  17. var type;
  18. function ret(tp, style) {
  19. type = tp;
  20. return style;
  21. }
  22. function tokenComment(stream, state) {
  23. var isEnd = false;
  24. var ch;
  25. while(ch = stream.next()) {
  26. if(ch == "/" && isEnd) {
  27. state.tokenize = tokenBase;
  28. break;
  29. }
  30. isEnd = (ch == "*");
  31. }
  32. return ret("comment", "comment");
  33. }
  34. function tokenString(quote) {
  35. return function(stream, state) {
  36. var escaped = false, next, end = false;
  37. while((next = stream.next()) != null) {
  38. if (next == quote && !escaped) {
  39. end = true; break;
  40. }
  41. escaped = !escaped && next == "\\";
  42. }
  43. if (end || !(escaped || multiLineStrings))
  44. state.tokenize = tokenBase;
  45. return ret("string", "error");
  46. };
  47. }
  48. function tokenBase(stream, state) {
  49. var ch = stream.next();
  50. // is a start of string?
  51. if (ch == '"' || ch == "'")
  52. return chain(stream, state, tokenString(ch));
  53. // is it one of the special chars
  54. else if(/[\[\]{}\(\),;\.]/.test(ch))
  55. return ret(ch);
  56. // is it a number?
  57. else if(/\d/.test(ch)) {
  58. stream.eatWhile(/[\w\.]/);
  59. return ret("number", "number");
  60. }
  61. // multi line comment or operator
  62. else if (ch == "/") {
  63. if (stream.eat("*")) {
  64. return chain(stream, state, tokenComment);
  65. }
  66. else {
  67. stream.eatWhile(isOperatorChar);
  68. return ret("operator", "operator");
  69. }
  70. }
  71. // single line comment or operator
  72. else if (ch=="-") {
  73. if(stream.eat("-")){
  74. stream.skipToEnd();
  75. return ret("comment", "comment");
  76. }
  77. else {
  78. stream.eatWhile(isOperatorChar);
  79. return ret("operator", "operator");
  80. }
  81. }
  82. // is it an operator
  83. else if (isOperatorChar.test(ch)) {
  84. stream.eatWhile(isOperatorChar);
  85. return ret("operator", "operator");
  86. }
  87. else {
  88. // get the while word
  89. stream.eatWhile(/[\w\$_]/);
  90. // is it one of the listed keywords?
  91. if (keywords && keywords.propertyIsEnumerable(stream.current().toUpperCase())) {
  92. if (stream.eat(")") || stream.eat(".")) {
  93. //keywords can be used as variables like flatten(group), group.$0 etc..
  94. }
  95. else {
  96. return ("keyword", "keyword");
  97. }
  98. }
  99. // is it one of the builtin functions?
  100. if (builtins && builtins.propertyIsEnumerable(stream.current().toUpperCase()))
  101. {
  102. return ("keyword", "variable-2");
  103. }
  104. // is it one of the listed types?
  105. if (types && types.propertyIsEnumerable(stream.current().toUpperCase()))
  106. return ("keyword", "variable-3");
  107. // default is a 'variable'
  108. return ret("variable", "pig-word");
  109. }
  110. }
  111. // Interface
  112. return {
  113. startState: function() {
  114. return {
  115. tokenize: tokenBase,
  116. startOfLine: true
  117. };
  118. },
  119. token: function(stream, state) {
  120. if(stream.eatSpace()) return null;
  121. var style = state.tokenize(stream, state);
  122. return style;
  123. }
  124. };
  125. });
  126. (function() {
  127. function keywords(str) {
  128. var obj = {}, words = str.split(" ");
  129. for (var i = 0; i < words.length; ++i) obj[words[i]] = true;
  130. return obj;
  131. }
  132. // builtin funcs taken from trunk revision 1303237
  133. var pBuiltins = "ABS ACOS ARITY ASIN ATAN AVG BAGSIZE BINSTORAGE BLOOM BUILDBLOOM CBRT CEIL "
  134. + "CONCAT COR COS COSH COUNT COUNT_STAR COV CONSTANTSIZE CUBEDIMENSIONS DIFF DISTINCT DOUBLEABS "
  135. + "DOUBLEAVG DOUBLEBASE DOUBLEMAX DOUBLEMIN DOUBLEROUND DOUBLESUM EXP FLOOR FLOATABS FLOATAVG "
  136. + "FLOATMAX FLOATMIN FLOATROUND FLOATSUM GENERICINVOKER INDEXOF INTABS INTAVG INTMAX INTMIN "
  137. + "INTSUM INVOKEFORDOUBLE INVOKEFORFLOAT INVOKEFORINT INVOKEFORLONG INVOKEFORSTRING INVOKER "
  138. + "ISEMPTY JSONLOADER JSONMETADATA JSONSTORAGE LAST_INDEX_OF LCFIRST LOG LOG10 LOWER LONGABS "
  139. + "LONGAVG LONGMAX LONGMIN LONGSUM MAX MIN MAPSIZE MONITOREDUDF NONDETERMINISTIC OUTPUTSCHEMA "
  140. + "PIGSTORAGE PIGSTREAMING RANDOM REGEX_EXTRACT REGEX_EXTRACT_ALL REPLACE ROUND SIN SINH SIZE "
  141. + "SQRT STRSPLIT SUBSTRING SUM STRINGCONCAT STRINGMAX STRINGMIN STRINGSIZE TAN TANH TOBAG "
  142. + "TOKENIZE TOMAP TOP TOTUPLE TRIM TEXTLOADER TUPLESIZE UCFIRST UPPER UTF8STORAGECONVERTER ";
  143. // taken from QueryLexer.g
  144. var pKeywords = "VOID IMPORT RETURNS DEFINE LOAD FILTER FOREACH ORDER CUBE DISTINCT COGROUP "
  145. + "JOIN CROSS UNION SPLIT INTO IF OTHERWISE ALL AS BY USING INNER OUTER ONSCHEMA PARALLEL "
  146. + "PARTITION GROUP AND OR NOT GENERATE FLATTEN ASC DESC IS STREAM THROUGH STORE MAPREDUCE "
  147. + "SHIP CACHE INPUT OUTPUT STDERROR STDIN STDOUT LIMIT SAMPLE LEFT RIGHT FULL EQ GT LT GTE LTE "
  148. + "NEQ MATCHES TRUE FALSE ";
  149. // data types
  150. var pTypes = "BOOLEAN INT LONG FLOAT DOUBLE CHARARRAY BYTEARRAY BAG TUPLE MAP ";
  151. CodeMirror.defineMIME("text/x-pig", {
  152. name: "pig",
  153. builtins: keywords(pBuiltins),
  154. keywords: keywords(pKeywords),
  155. types: keywords(pTypes)
  156. });
  157. }());