diff --git a/mode/erlang/erlang.js b/mode/erlang/erlang.js index af8953c33b81f5e370f9d8e6ac1c89c00c7b91f6..bc3bdce86049bd768c829bda6795b31c2d9dacae 100644 --- a/mode/erlang/erlang.js +++ b/mode/erlang/erlang.js @@ -1,45 +1,24 @@ -// block; "begin", "case", "fun", "if", "receive", "try": closed by "end" -// block internal; "after", "catch", "of" -// guard; "when", closed by "->" -// "->" opens a clause, closed by ";" or "." -// "<<" opens a binary, closed by ">>" -// "," appears in arglists, lists, tuples and terminates lines of code -// "." resets indentation to 0 -// obsolete; "cond", "let", "query" +/*jshint unused:true, eqnull:true, curly:true, bitwise:true */ +/*jshint undef:true, latedef:true, trailing:true */ +/*global CodeMirror:true */ + +// erlang mode. +// tokenizer -> token types -> CodeMirror styles +// tokenizer maintains a parse stack +// indenter uses the parse stack + +// TODO indenter: +// bit syntax +// old guard/bif/conversion clashes (e.g. "float/1") +// type/spec/opaque CodeMirror.defineMIME("text/x-erlang", "erlang"); CodeMirror.defineMode("erlang", function(cmCfg) { + "use strict"; - function rval(state,_stream,type) { - // distinguish between "." as terminator and record field operator - state.in_record = (type == "record"); - - // erlang -> CodeMirror tag - switch (type) { - case "atom": return "atom"; - case "attribute": return "attribute"; - case "boolean": return "special"; - case "builtin": return "builtin"; - case "comment": return "comment"; - case "fun": return "meta"; - case "function": return "tag"; - case "guard": return "property"; - case "keyword": return "keyword"; - case "macro": return "variable-2"; - case "number": return "number"; - case "operator": return "operator"; - case "record": return "bracket"; - case "string": return "string"; - case "type": return "def"; - case "variable": return "variable"; - case "error": return "error"; - case "separator": return null; - case "open_paren": return null; - case "close_paren": return null; - default: return null; - } - } +///////////////////////////////////////////////////////////////////////////// +// constants var typeWords = [ "-type", "-spec", "-export_type", "-opaque"]; @@ -48,23 +27,23 @@ CodeMirror.defineMode("erlang", function(cmCfg) { "after","begin","catch","case","cond","end","fun","if", "let","of","query","receive","try","when"]; - var separatorRE = /[\->\.,:;]/; + var separatorRE = /[\->,;]/; var separatorWords = [ - "->",";",":",".",","]; + "->",";",","]; - var operatorWords = [ + var operatorAtomWords = [ "and","andalso","band","bnot","bor","bsl","bsr","bxor", "div","not","or","orelse","rem","xor"]; - var symbolRE = /[\+\-\*\/<>=\|:!]/; - var symbolWords = [ - "+","-","*","/",">",">=","<","=<","=:=","==","=/=","/=","||","<-","!"]; + var operatorSymbolRE = /[\+\-\*\/<>=\|:!]/; + var operatorSymbolWords = [ + "=","+","-","*","/",">",">=","<","=<","=:=","==","=/=","/=","||","<-","!"]; - var openParenRE = /[<\(\[\{]/; + var openParenRE = /[<\(\[\{]/; var openParenWords = [ "<<","(","[","{"]; - var closeParenRE = /[>\)\]\}]/; + var closeParenRE = /[>\)\]\}]/; var closeParenWords = [ "}","]",")",">>"]; @@ -99,23 +78,25 @@ CodeMirror.defineMode("erlang", function(cmCfg) { "term_to_binary","time","throw","tl","trunc","tuple_size", "tuple_to_list","unlink","unregister","whereis"]; -// [Ø-Þ] [À-Ö] -// [ß-ö] [ø-ÿ] +// upper case: [A-Z] [Ø-Þ] [À-Ö] +// lower case: [a-z] [ß-ö] [ø-ÿ] var anumRE = /[\w@Ø-ÞÀ-Öß-öø-ÿ]/; var escapesRE = /[0-7]{1,3}|[bdefnrstv\\"']|\^[a-zA-Z]|x[0-9a-zA-Z]{2}|x{[0-9a-zA-Z]+}/; - function tokenize(stream, state) { +///////////////////////////////////////////////////////////////////////////// +// tokenizer + function tokenizer(stream,state) { // in multi-line string if (state.in_string) { - state.in_string = (!doubleQuote(stream)); + state.in_string = (!doubleQuote(stream)); return rval(state,stream,"string"); } // in multi-line atom if (state.in_atom) { - state.in_atom = (!singleQuote(stream)); + state.in_atom = (!singleQuote(stream)); return rval(state,stream,"atom"); } @@ -125,9 +106,9 @@ CodeMirror.defineMode("erlang", function(cmCfg) { } // attributes and type specs - if ((peekToken(state).token == "") && + if (!peekToken(state) && stream.match(/-\s*[a-zß-öø-ÿ][\wØ-ÞÀ-Öß-öø-ÿ]*/)) { - if (isMember(stream.current(),typeWords)) { + if (is_member(stream.current(),typeWords)) { return rval(state,stream,"type"); }else{ return rval(state,stream,"attribute"); @@ -142,32 +123,43 @@ CodeMirror.defineMode("erlang", function(cmCfg) { return rval(state,stream,"comment"); } + // colon + if (ch == ":") { + return rval(state,stream,"colon"); + } + // macro if (ch == '?') { + stream.eatSpace(); stream.eatWhile(anumRE); return rval(state,stream,"macro"); } // record if (ch == "#") { + stream.eatSpace(); stream.eatWhile(anumRE); return rval(state,stream,"record"); } // dollar escape - if ( ch == "$" ) { + if (ch == "$") { if (stream.next() == "\\" && !stream.match(escapesRE)) { return rval(state,stream,"error"); } return rval(state,stream,"number"); } + // dot + if (ch == ".") { + return rval(state,stream,"dot"); + } + // quoted atom if (ch == '\'') { if (!(state.in_atom = (!singleQuote(stream)))) { if (stream.match(/\s*\/\s*[0-9]/,false)) { stream.match(/\s*\/\s*[0-9]/,true); - popToken(state); return rval(state,stream,"fun"); // 'f'/0 style fun } if (stream.match(/\s*\(/,false) || stream.match(/\s*:/,false)) { @@ -195,34 +187,37 @@ CodeMirror.defineMode("erlang", function(cmCfg) { if (stream.match(/\s*\/\s*[0-9]/,false)) { stream.match(/\s*\/\s*[0-9]/,true); - popToken(state); return rval(state,stream,"fun"); // f/0 style fun } var w = stream.current(); - if (isMember(w,keywordWords)) { - pushToken(state,stream); + if (is_member(w,keywordWords)) { return rval(state,stream,"keyword"); + }else if (is_member(w,operatorAtomWords)) { + return rval(state,stream,"operator"); }else if (stream.match(/\s*\(/,false)) { // 'put' and 'erlang:put' are bifs, 'foo:put' is not - if (isMember(w,bifWords) && - (!isPrev(stream,":") || isPrev(stream,"erlang:"))) { + if (is_member(w,bifWords) && + ((peekToken(state).token != ":") || + (peekToken(state,2).token == "erlang"))) { return rval(state,stream,"builtin"); - }else if (isMember(w,guardWords)) { + }else if (is_member(w,guardWords)) { return rval(state,stream,"guard"); }else{ return rval(state,stream,"function"); } - }else if (isMember(w,operatorWords)) { + }else if (is_member(w,operatorAtomWords)) { return rval(state,stream,"operator"); - }else if (stream.match(/\s*:/,false)) { + }else if (lookahead(stream) == ":") { if (w == "erlang") { return rval(state,stream,"builtin"); } else { return rval(state,stream,"function"); } - }else if (isMember(w,["true","false"])) { + }else if (is_member(w,["true","false"])) { + return rval(state,stream,"boolean"); + }else if (is_member(w,["true","false"])) { return rval(state,stream,"boolean"); }else{ return rval(state,stream,"atom"); @@ -234,15 +229,25 @@ CodeMirror.defineMode("erlang", function(cmCfg) { var radixRE = /[0-9a-zA-Z]/; // 36#zZ style int if (digitRE.test(ch)) { stream.eatWhile(digitRE); - if (stream.eat('#')) { - stream.eatWhile(radixRE); // 36#aZ style integer - } else { - if (stream.eat('.')) { // float - stream.eatWhile(digitRE); + if (stream.eat('#')) { // 36#aZ style integer + if (!stream.eatWhile(radixRE)) { + stream.backUp(1); //"36#" - syntax error } - if (stream.eat(/[eE]/)) { - stream.eat(/[-+]/); // float with exponent - stream.eatWhile(digitRE); + } else if (stream.eat('.')) { // float + if (!stream.eatWhile(digitRE)) { + stream.backUp(1); // "3." - probably end of function + } else { + if (stream.eat(/[eE]/)) { // float with exponent + if (stream.eat(/[-+]/)) { + if (!stream.eatWhile(digitRE)) { + stream.backUp(2); // "2e-" - syntax error + } + } else { + if (!stream.eatWhile(digitRE)) { + stream.backUp(1); // "2e" - syntax error + } + } + } } } return rval(state,stream,"number"); // normal integer @@ -250,50 +255,35 @@ CodeMirror.defineMode("erlang", function(cmCfg) { // open parens if (nongreedy(stream,openParenRE,openParenWords)) { - pushToken(state,stream); return rval(state,stream,"open_paren"); } // close parens if (nongreedy(stream,closeParenRE,closeParenWords)) { - pushToken(state,stream); return rval(state,stream,"close_paren"); } // separators if (greedy(stream,separatorRE,separatorWords)) { - // distinguish between "." as terminator and record field operator - if (!state.in_record) { - pushToken(state,stream); - } return rval(state,stream,"separator"); } // operators - if (greedy(stream,symbolRE,symbolWords)) { + if (greedy(stream,operatorSymbolRE,operatorSymbolWords)) { return rval(state,stream,"operator"); } return rval(state,stream,null); } - function isPrev(stream,string) { - var start = stream.start; - var len = string.length; - if (len <= start) { - var word = stream.string.slice(start-len,start); - return word == string; - }else{ - return false; - } - } - +///////////////////////////////////////////////////////////////////////////// +// utilities function nongreedy(stream,re,words) { if (stream.current().length == 1 && re.test(stream.current())) { stream.backUp(1); while (re.test(stream.peek())) { stream.next(); - if (isMember(stream.current(),words)) { + if (is_member(stream.current(),words)) { return true; } } @@ -308,7 +298,7 @@ CodeMirror.defineMode("erlang", function(cmCfg) { stream.next(); } while (0 < stream.current().length) { - if (isMember(stream.current(),words)) { + if (is_member(stream.current(),words)) { return true; }else{ stream.backUp(1); @@ -339,144 +329,277 @@ CodeMirror.defineMode("erlang", function(cmCfg) { return false; } - function isMember(element,list) { + function lookahead(stream) { + var m = stream.match(/([\n\s]+|%[^\n]*\n)*(.)/,false); + return m ? m.pop() : ""; + } + + function is_member(element,list) { return (-1 < list.indexOf(element)); } -///////////////////////////////////////////////////////////////////////////// - function myIndent(state,textAfter) { - var indent = cmCfg.indentUnit; - var token = (peekToken(state)).token; - var wordAfter = takewhile(textAfter,/[^a-z]/); + function rval(state,stream,type) { - if (state.in_string || state.in_atom) { - return CodeMirror.Pass; - }else if (token == "") { - return 0; - }else if (isMember(token,openParenWords)) { - return (peekToken(state)).column+token.length; - }else if (token == "when") { - return (peekToken(state)).column+token.length+1; - }else if (token == "fun" && wordAfter == "") { - return (peekToken(state)).column+token.length; - }else if (token == "->") { - if (isMember(wordAfter,["end","after","catch"])) { - return peekToken(state,2).column; - }else if (peekToken(state,2).token == "fun") { - return peekToken(state,2).column+indent; - }else if (peekToken(state,2).token == "") { - return indent; - }else{ - return (peekToken(state)).indent+indent; - } - }else if (isMember(wordAfter,["after","catch","of"])) { - return (peekToken(state)).indent; - }else{ - return (peekToken(state)).column+indent; + // parse stack + pushToken(state,realToken(type,stream)); + + // map erlang token type to CodeMirror style class + // erlang -> CodeMirror tag + switch (type) { + case "atom": return "atom"; + case "attribute": return "attribute"; + case "boolean": return "special"; + case "builtin": return "builtin"; + case "close_paren": return null; + case "colon": return null; + case "comment": return "comment"; + case "dot": return null; + case "error": return "error"; + case "fun": return "meta"; + case "function": return "tag"; + case "guard": return "property"; + case "keyword": return "keyword"; + case "macro": return "variable-2"; + case "number": return "number"; + case "open_paren": return null; + case "operator": return "operator"; + case "record": return "bracket"; + case "separator": return null; + case "string": return "string"; + case "type": return "def"; + case "variable": return "variable"; + default: return null; } } - function takewhile(str,re) { - var m = str.match(re); - return m ? str.slice(0,m.index) : str; + function aToken(tok,col,ind,typ) { + return {token: tok, + column: col, + indent: ind, + type: typ}; } - function Token(stream) { - this.token = stream ? stream.current() : ""; - this.column = stream ? stream.column() : 0; - this.indent = stream ? stream.indentation() : 0; + function realToken(type,stream) { + return aToken(stream.current(), + stream.column(), + stream.indentation(), + type); } - function popToken(state) { - return state.tokenStack.pop(); + function fakeToken(type) { + return aToken(type,0,0,type); } function peekToken(state,depth) { var len = state.tokenStack.length; var dep = (depth ? depth : 1); + if (len < dep) { - return new Token; + return false; }else{ return state.tokenStack[len-dep]; } } - function pushToken(state,stream) { - var token = stream.current(); - var prev_token = peekToken(state).token; + function pushToken(state,token) { - if (token == ".") { - state.tokenStack = []; - return false; - }else if(isMember(token,[",", ":", "of", "cond", "let", "query"])) { - return false; - }else if (drop_last(prev_token,token)) { - return false; - }else if (drop_both(prev_token,token)) { - popToken(state); - return false; - }else if (drop_first(prev_token,token)) { - popToken(state); - return pushToken(state,stream); - }else if (isMember(token,["after","catch"])) { - return false; + if (!(token.type == "comment" || token.type == "whitespace")) { + state.tokenStack = maybe_drop_pre(state.tokenStack,token); + state.tokenStack = maybe_drop_post(state.tokenStack); + } + } + + function maybe_drop_pre(s,token) { + var last = s.length-1; + + if (0 < last && s[last].type === "record" && token.type === "dot") { + s.pop(); + }else if (0 < last && s[last].type === "group") { + s.pop(); + s.push(token); }else{ - state.tokenStack.push(new Token(stream)); - return true; + s.push(token); } + return s; } - function drop_last(open, close) { - switch(open+" "+close) { - case "when ;": return true; - default: return false; + function maybe_drop_post(s) { + var last = s.length-1; + + if (s[last].type === "dot") { + return []; + } + if (s[last].type === "fun" && s[last-1].token === "fun") { + return s.slice(0,last-1); + } + switch (s[s.length-1].token) { + case "}": return d(s,{g:["{"]}); + case "]": return d(s,{i:["["]}); + case ")": return d(s,{i:["("]}); + case ">>": return d(s,{i:["<<"]}); + case "end": return d(s,{i:["begin","case","fun","if","receive","try"]}); + case ",": return d(s,{e:["begin","try","when","->", + ",","(","[","{","<<"]}); + case "->": return d(s,{r:["when"], + m:["try","if","case","receive"]}); + case ";": return d(s,{E:["case","fun","if","receive","try","when"]}); + case "catch":return d(s,{e:["try"]}); + case "of": return d(s,{e:["case"]}); + case "after":return d(s,{e:["receive","try"]}); + default: return s; } } - function drop_first(open, close) { - switch (open+" "+close) { - case "when ->": return true; - case "-> end": return true; - default: return false; + function d(stack,tt) { + // stack is a stack of Token objects. + // tt is an object; {type:tokens} + // type is a char, tokens is a list of token strings. + // The function returns (possibly truncated) stack. + // It will descend the stack, looking for a Token such that Token.token + // is a member of tokens. If it does not find that, it will normally (but + // see "E" below) return stack. If it does find a match, it will remove + // all the Tokens between the top and the matched Token. + // If type is "m", that is all it does. + // If type is "i", it will also remove the matched Token and the top Token. + // If type is "g", like "i", but add a fake "group" token at the top. + // If type is "r", it will remove the matched Token, but not the top Token. + // If type is "e", it will keep the matched Token but not the top Token. + // If type is "E", it behaves as for type "e", except if there is no match, + // in which case it will return an empty stack. + + for (var type in tt) { + var len = stack.length-1; + var tokens = tt[type]; + for (var i = len-1; -1 < i ; i--) { + if (is_member(stack[i].token,tokens)) { + var ss = stack.slice(0,i); + switch (type) { + case "m": return ss.concat(stack[i]).concat(stack[len]); + case "r": return ss.concat(stack[len]); + case "i": return ss; + case "g": return ss.concat(fakeToken("group")); + case "E": return ss.concat(stack[i]); + case "e": return ss.concat(stack[i]); + } + } + } } + return (type == "E" ? [] : stack); } - function drop_both(open, close) { - switch (open+" "+close) { - case "( )": return true; - case "[ ]": return true; - case "{ }": return true; - case "<< >>": return true; - case "begin end": return true; - case "case end": return true; - case "fun end": return true; - case "if end": return true; - case "receive end": return true; - case "try end": return true; - case "-> catch": return true; - case "-> after": return true; - case "-> ;": return true; - default: return false; +///////////////////////////////////////////////////////////////////////////// +// indenter + + function indenter(state,textAfter) { + var t; + var unit = cmCfg.indentUnit; + var wordAfter = wordafter(textAfter); + var currT = peekToken(state,1); + var prevT = peekToken(state,2); + + if (state.in_string || state.in_atom) { + return CodeMirror.Pass; + }else if (!prevT) { + return 0; + }else if (currT.token == "when") { + return currT.column+unit; + }else if (wordAfter === "when" && prevT.type === "function") { + return prevT.indent+unit; + }else if (wordAfter === "(" && currT.token === "fun") { + return currT.column+3; + }else if (wordAfter === "catch" && (t = getToken(state,["try"]))) { + return t.column; + }else if (is_member(wordAfter,["end","after","of"])) { + t = getToken(state,["begin","case","fun","if","receive","try"]); + return t ? t.column : CodeMirror.Pass; + }else if (is_member(wordAfter,closeParenWords)) { + t = getToken(state,openParenWords); + return t ? t.column : CodeMirror.Pass; + }else if (is_member(currT.token,[",","|","||"]) || + is_member(wordAfter,[",","|","||"])) { + t = postcommaToken(state); + return t ? t.column+t.token.length : unit; + }else if (currT.token == "->") { + if (is_member(prevT.token, ["receive","case","if","try"])) { + return prevT.column+unit+unit; + }else{ + return prevT.column+unit; + } + }else if (is_member(currT.token,openParenWords)) { + return currT.column+currT.token.length; + }else{ + t = defaultToken(state); + return truthy(t) ? t.column+unit : 0; } } + function wordafter(str) { + var m = str.match(/,|[a-z]+|\}|\]|\)|>>|\|+|\(/); + + return truthy(m) && (m.index === 0) ? m[0] : ""; + } + + function postcommaToken(state) { + var objs = state.tokenStack.slice(0,-1); + var i = getTokenIndex(objs,"type",["open_paren"]); + + return truthy(objs[i]) ? objs[i] : false; + } + + function defaultToken(state) { + var objs = state.tokenStack; + var stop = getTokenIndex(objs,"type",["open_paren","separator","keyword"]); + var oper = getTokenIndex(objs,"type",["operator"]); + + if (truthy(stop) && truthy(oper) && stop < oper) { + return objs[stop+1]; + } else if (truthy(stop)) { + return objs[stop]; + } else { + return false; + } + } + + function getToken(state,tokens) { + var objs = state.tokenStack; + var i = getTokenIndex(objs,"token",tokens); + + return truthy(objs[i]) ? objs[i] : false; + } + + function getTokenIndex(objs,propname,propvals) { + + for (var i = objs.length-1; -1 < i ; i--) { + if (is_member(objs[i][propname],propvals)) { + return i; + } + } + return false; + } + + function truthy(x) { + return (x !== false) && (x != null); + } + +///////////////////////////////////////////////////////////////////////////// +// this object defines the mode + return { startState: function() { return {tokenStack: [], - in_record: false, in_string: false, in_atom: false}; }, token: function(stream, state) { - return tokenize(stream, state); + return tokenizer(stream, state); }, indent: function(state, textAfter) { - return myIndent(state,textAfter); + return indenter(state,textAfter); }, lineComment: "%" diff --git a/mode/erlang/index.html b/mode/erlang/index.html index e63e231201071b1d9a7a976add57f3595f4070f1..9cacb759f6931cd176d81283ff5c3c243b36fb4e 100644 --- a/mode/erlang/index.html +++ b/mode/erlang/index.html @@ -51,11 +51,12 @@ expand_recs(M,Tup) when is_tuple(Tup) -> case tuple_size(Tup) of L when L < 1 -> Tup; L -> - try Fields = M:rec_info(element(1,Tup)), - L = length(Fields)+1, - lists:zip(Fields,expand_recs(M,tl(tuple_to_list(Tup)))) - catch _:_ -> - list_to_tuple(expand_recs(M,tuple_to_list(Tup))) + try + Fields = M:rec_info(element(1,Tup)), + L = length(Fields)+1, + lists:zip(Fields,expand_recs(M,tl(tuple_to_list(Tup)))) + catch + _:_ -> list_to_tuple(expand_recs(M,tuple_to_list(Tup))) end end; expand_recs(_,Term) ->