summaryrefslogtreecommitdiffstats
path: root/media/CodeMirror-0.62/js/tokenizejavascript.js
diff options
context:
space:
mode:
Diffstat (limited to 'media/CodeMirror-0.62/js/tokenizejavascript.js')
-rw-r--r--media/CodeMirror-0.62/js/tokenizejavascript.js175
1 files changed, 175 insertions, 0 deletions
diff --git a/media/CodeMirror-0.62/js/tokenizejavascript.js b/media/CodeMirror-0.62/js/tokenizejavascript.js
new file mode 100644
index 0000000..f55dfce
--- /dev/null
+++ b/media/CodeMirror-0.62/js/tokenizejavascript.js
@@ -0,0 +1,175 @@
+/* Tokenizer for JavaScript code */
+
+var tokenizeJavaScript = (function() {
+ // Advance the stream until the given character (not preceded by a
+ // backslash) is encountered, or the end of the line is reached.
+ function nextUntilUnescaped(source, end) {
+ var escaped = false;
+ var next;
+ while (!source.endOfLine()) {
+ var next = source.next();
+ if (next == end && !escaped)
+ return false;
+ escaped = !escaped && next == "\\";
+ }
+ return escaped;
+ }
+
+ // A map of JavaScript's keywords. The a/b/c keyword distinction is
+ // very rough, but it gives the parser enough information to parse
+ // correct code correctly (we don't care that much how we parse
+ // incorrect code). The style information included in these objects
+ // is used by the highlighter to pick the correct CSS style for a
+ // token.
+ var keywords = function(){
+ function result(type, style){
+ return {type: type, style: "js-" + style};
+ }
+ // keywords that take a parenthised expression, and then a
+ // statement (if)
+ var keywordA = result("keyword a", "keyword");
+ // keywords that take just a statement (else)
+ var keywordB = result("keyword b", "keyword");
+ // keywords that optionally take an expression, and form a
+ // statement (return)
+ var keywordC = result("keyword c", "keyword");
+ var operator = result("operator", "keyword");
+ var atom = result("atom", "atom");
+ return {
+ "if": keywordA, "while": keywordA, "with": keywordA,
+ "else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB,
+ "return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC,
+ "in": operator, "typeof": operator, "instanceof": operator,
+ "var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"),
+ "for": result("for", "keyword"), "switch": result("switch", "keyword"),
+ "case": result("case", "keyword"), "default": result("default", "keyword"),
+ "true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom
+ };
+ }();
+
+ // Some helper regexps
+ var isOperatorChar = /[+\-*&%\/=<>!?|]/;
+ var isHexDigit = /[0-9A-Fa-f]/;
+ var isWordChar = /[\w\$_]/;
+
+ // Wrapper around jsToken that helps maintain parser state (whether
+ // we are inside of a multi-line comment and whether the next token
+ // could be a regular expression).
+ function jsTokenState(inside, regexp) {
+ return function(source, setState) {
+ var newInside = inside;
+ var type = jsToken(inside, regexp, source, function(c) {newInside = c;});
+ var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/);
+ if (newRegexp != regexp || newInside != inside)
+ setState(jsTokenState(newInside, newRegexp));
+ return type;
+ };
+ }
+
+ // The token reader, inteded to be used by the tokenizer from
+ // tokenize.js (through jsTokenState). Advances the source stream
+ // over a token, and returns an object containing the type and style
+ // of that token.
+ function jsToken(inside, regexp, source, setInside) {
+ function readHexNumber(){
+ source.next(); // skip the 'x'
+ source.nextWhileMatches(isHexDigit);
+ return {type: "number", style: "js-atom"};
+ }
+
+ function readNumber() {
+ source.nextWhileMatches(/[0-9]/);
+ if (source.equals(".")){
+ source.next();
+ source.nextWhileMatches(/[0-9]/);
+ }
+ if (source.equals("e") || source.equals("E")){
+ source.next();
+ if (source.equals("-"))
+ source.next();
+ source.nextWhileMatches(/[0-9]/);
+ }
+ return {type: "number", style: "js-atom"};
+ }
+ // Read a word, look it up in keywords. If not found, it is a
+ // variable, otherwise it is a keyword of the type found.
+ function readWord() {
+ source.nextWhileMatches(isWordChar);
+ var word = source.get();
+ var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word];
+ return known ? {type: known.type, style: known.style, content: word} :
+ {type: "variable", style: "js-variable", content: word};
+ }
+ function readRegexp() {
+ nextUntilUnescaped(source, "/");
+ source.nextWhileMatches(/[gi]/);
+ return {type: "regexp", style: "js-string"};
+ }
+ // Mutli-line comments are tricky. We want to return the newlines
+ // embedded in them as regular newline tokens, and then continue
+ // returning a comment token for every line of the comment. So
+ // some state has to be saved (inside) to indicate whether we are
+ // inside a /* */ sequence.
+ function readMultilineComment(start){
+ var newInside = "/*";
+ var maybeEnd = (start == "*");
+ while (true) {
+ if (source.endOfLine())
+ break;
+ var next = source.next();
+ if (next == "/" && maybeEnd){
+ newInside = null;
+ break;
+ }
+ maybeEnd = (next == "*");
+ }
+ setInside(newInside);
+ return {type: "comment", style: "js-comment"};
+ }
+ function readOperator() {
+ source.nextWhileMatches(isOperatorChar);
+ return {type: "operator", style: "js-operator"};
+ }
+ function readString(quote) {
+ var endBackSlash = nextUntilUnescaped(source, quote);
+ setInside(endBackSlash ? quote : null);
+ return {type: "string", style: "js-string"};
+ }
+
+ // Fetch the next token. Dispatches on first character in the
+ // stream, or first two characters when the first is a slash.
+ if (inside == "\"" || inside == "'")
+ return readString(inside);
+ var ch = source.next();
+ if (inside == "/*")
+ return readMultilineComment(ch);
+ else if (ch == "\"" || ch == "'")
+ return readString(ch);
+ // with punctuation, the type of the token is the symbol itself
+ else if (/[\[\]{}\(\),;\:\.]/.test(ch))
+ return {type: ch, style: "js-punctuation"};
+ else if (ch == "0" && (source.equals("x") || source.equals("X")))
+ return readHexNumber();
+ else if (/[0-9]/.test(ch))
+ return readNumber();
+ else if (ch == "/"){
+ if (source.equals("*"))
+ { source.next(); return readMultilineComment(ch); }
+ else if (source.equals("/"))
+ { nextUntilUnescaped(source, null); return {type: "comment", style: "js-comment"};}
+ else if (regexp)
+ return readRegexp();
+ else
+ return readOperator();
+ }
+ else if (isOperatorChar.test(ch))
+ return readOperator();
+ else
+ return readWord();
+ }
+
+ // The external interface to the tokenizer.
+ return function(source, startState) {
+ return tokenizer(source, startState || jsTokenState(false, true));
+ };
+})();