From 2c7e71167ce0f3d455eb6333be3162f1f4c66e95 Mon Sep 17 00:00:00 2001
From: mats cronqvist <masse@klarna.com>
Date: Fri, 18 May 2012 03:35:15 +0200
Subject: [PATCH] [erlang mode] Add indentation support

---
 mode/erlang/erlang.js  | 354 ++++++++++++++++++++++++++++++++---------
 mode/erlang/index.html |   1 +
 2 files changed, 284 insertions(+), 71 deletions(-)

diff --git a/mode/erlang/erlang.js b/mode/erlang/erlang.js
index 5c094f8e5..e79ab7668 100644
--- a/mode/erlang/erlang.js
+++ b/mode/erlang/erlang.js
@@ -1,26 +1,53 @@
-// erlang    -> CodeMirror tag
-//
-// atom      -> atom
-// attribute -> attribute
-// builtin   -> builtin
-// comment   -> comment
-// error     -> error
-// fun       -> meta
-// function  -> tag
-// guard     -> property
-// keyword   -> keyword
-// macro     -> variable-2
-// number    -> number
-// operator  -> operator
-// record    -> bracket
-// string    -> string
-// type      -> def
-// variable  -> variable
+// block; "begin", "case", "fun", "if", "receive", "try": closed by "end"
+// block internal; "after", "catch", "of"
+// guard; "when", closed by "->"
+// "->" opens a clause, closed by ";" or "."
+// "<<" opens a binary, closed by ">>"
+// "," appears in arglists, lists, tuples and terminates lines of code
+// "." resets indentation to 0
+// obsolete; "cond", "let", "query"
 
 CodeMirror.defineMIME("text/x-erlang", "erlang");
 
 CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
 
+  function rval(state,stream,type) {
+    // distinguish between "." as terminator and record field operator
+    if (type == "record") {
+      state.context = "record";
+    }else{
+      state.context = false;
+    }
+
+    // remember last significant bit on last line for indenting
+    if (type != "whitespace" && type != "comment") {
+      state.lastToken = stream.current();
+    }
+    //     erlang             -> CodeMirror tag
+    switch (type) {
+      case "atom":        return "atom";
+      case "attribute":   return "attribute";
+      case "builtin":     return "builtin";
+      case "comment":     return "comment";
+      case "fun":         return "meta";
+      case "function":    return "tag";
+      case "guard":       return "property";
+      case "keyword":     return "keyword";
+      case "macro":       return "variable-2";
+      case "number":      return "number";
+      case "operator":    return "operator";
+      case "record":      return "bracket";
+      case "string":      return "string";
+      case "type":        return "def";
+      case "variable":    return "variable";
+      case "error":       return "error";
+      case "separator":   return null;
+      case "open_paren":  return null;
+      case "close_paren": return null;
+      default:            return null;
+    }
+  }
+
   var typeWords = [
     "-type", "-spec", "-export_type", "-opaque"];
 
@@ -28,13 +55,22 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
     "after","begin","catch","case","cond","end","fun","if",
     "let","of","query","receive","try","when"];
 
+  var separatorWords = [
+    "->",";",":",".",","];
+
   var operatorWords = [
     "and","andalso","band","bnot","bor","bsl","bsr","bxor",
     "div","not","or","orelse","rem","xor"];
 
-  var operatorSymbols = [
+  var symbolWords = [
     "+","-","*","/",">",">=","<","=<","=:=","==","=/=","/=","||","<-"];
 
+  var openParenWords = [
+    "<<","(","[","{"];
+
+  var closeParenWords = [
+    "}","]",")",">>"];
+
   var guardWords = [
     "is_atom","is_binary","is_bitstring","is_boolean","is_float",
     "is_function","is_integer","is_list","is_number","is_pid",
@@ -66,6 +102,21 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
     "term_to_binary","time","throw","tl","trunc","tuple_size",
     "tuple_to_list","unlink","unregister","whereis"];
 
+  // ignored for indenting purposes
+  var ignoreWords = [
+    ",", ":", "catch", "after", "of", "cond", "let", "query"];
+
+
+  var smallRE      = /[a-z_]/;
+  var largeRE      = /[A-Z_]/;
+  var digitRE      = /[0-9]/;
+  var octitRE      = /[0-7]/;
+  var anumRE       = /[a-z_A-Z0-9]/;
+  var symbolRE     = /[\+\-\*\/<>=\|:]/;
+  var openParenRE  = /[<\(\[\{]/;
+  var closeParenRE = /[>\)\]\}]/;
+  var sepRE        = /[\->\.,:;]/;
+
   function isMember(element,list) {
     return (-1 < list.indexOf(element));
   }
@@ -81,27 +132,20 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
     }
   }
 
-  var smallRE = /[a-z_]/;
-  var largeRE = /[A-Z_]/;
-  var digitRE = /[0-9]/;
-  var octitRE = /[0-7]/;
-  var idRE = /[a-z_A-Z0-9]/;
-
   function tokenize(stream, state) {
     if (stream.eatSpace()) {
-      return null;
+      return rval(state,stream,"whitespace");
     }
 
     // attributes and type specs
-    if (stream.sol() && stream.peek() == '-') {
+    if ((peekToken(state).token == "" || peekToken(state).token == ".") &&
+        stream.peek() == '-') {
       stream.next();
-      if (stream.eat(smallRE) && stream.eatWhile(idRE)) {
-        if (stream.peek() == "(") {
-          return "attribute";
-        }else if (isMember(stream.current(),typeWords)) {
-          return "def";
+      if (stream.eat(smallRE) && stream.eatWhile(anumRE)) {
+        if (isMember(stream.current(),typeWords)) {
+          return rval(state,stream,"type");
         }else{
-          return null;
+          return rval(state,stream,"attribute");
         }
       }
       stream.backUp(1);
@@ -112,19 +156,19 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
     // comment
     if (ch == '%') {
       stream.skipToEnd();
-      return "comment";
+      return rval(state,stream,"comment");
     }
 
     // macro
     if (ch == '?') {
-      stream.eatWhile(idRE);
-      return "variable-2";
+      stream.eatWhile(anumRE);
+      return rval(state,stream,"macro");
     }
 
     // record
     if ( ch == "#") {
-      stream.eatWhile(idRE);
-      return "bracket";
+      stream.eatWhile(anumRE);
+      return rval(state,stream,"record");
     }
 
     // char
@@ -134,69 +178,76 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
           stream.next();
         }
       }
-      return "string";
+      return rval(state,stream,"string");
     }
 
     // quoted atom
     if (ch == '\'') {
-      return singleQuote(stream);
+      if (singleQuote(stream)) {
+        return rval(state,stream,"atom");
+      }else{
+        return rval(state,stream,"error");
+      }
     }
 
     // string
     if (ch == '"') {
-      return doubleQuote(stream);
+      if (doubleQuote(stream)) {
+        return rval(state,stream,"string");
+      }else{
+        return rval(state,stream,"error");
+      }
     }
 
     // variable
     if (largeRE.test(ch)) {
-      stream.eatWhile(idRE);
-      return "variable";
+      stream.eatWhile(anumRE);
+      return rval(state,stream,"variable");
     }
 
     // atom/keyword/BIF/function
     if (smallRE.test(ch)) {
-      stream.eatWhile(idRE);
+      stream.eatWhile(anumRE);
 
       if (stream.peek() == "/") {
         stream.next();
         if (stream.eatWhile(digitRE)) {
-          return "meta";      // f/0 style fun
+          return rval(state,stream,"fun");      // f/0 style fun
         }else{
           stream.backUp(1);
-          return "atom";
+          return rval(state,stream,"atom");
         }
       }
 
       var w = stream.current();
 
       if (isMember(w,keywordWords)) {
-        return "keyword";           // keyword
+        pushToken(state,stream);
+        return rval(state,stream,"keyword");
       }
       if (stream.peek() == "(") {
+        // 'put' and 'erlang:put' are bifs, 'foo:put' is not
         if (isMember(w,bifWords) &&
             (!isPrev(stream,":") || isPrev(stream,"erlang:"))) {
-          return "builtin";         // BIF
+          return rval(state,stream,"builtin");
         }else{
-          return "tag";             // function
+          return rval(state,stream,"function");
         }
       }
       if (isMember(w,guardWords)) {
-        return "property";          // guard
+        return rval(state,stream,"guard");
       }
       if (isMember(w,operatorWords)) {
-        return "operator";          // operator
+        return rval(state,stream,"operator");
       }
-
-
       if (stream.peek() == ":") {
-        if (w == "erlang") {         // f:now() is highlighted incorrectly
-          return "builtin";
+        if (w == "erlang") {
+          return rval(state,stream,"builtin");
         } else {
-          return "tag";              // function application
+          return rval(state,stream,"function");
         }
       }
-
-      return "atom";
+      return rval(state,stream,"atom");               
     }
 
     // number
@@ -213,39 +264,200 @@ CodeMirror.defineMode("erlang", function(cmCfg, modeCfg) {
           stream.eatWhile(digitRE);
         }
       }
-      return "number";               // normal integer
+      return rval(state,stream,"number");   // normal integer
     }
 
-    return null;
+    // open parens
+    if (nongreedy(stream,openParenRE,openParenWords)) {
+      pushToken(state,stream);
+      return rval(state,stream,"open_paren");
+    }
+
+    // close parens
+    if (nongreedy(stream,closeParenRE,closeParenWords)) {
+      pushToken(state,stream);
+      return rval(state,stream,"close_paren");
+    }
+
+    // separators
+    if (greedy(stream,sepRE,separatorWords)) {
+      // distinguish between "." as terminator and record field operator
+      if (state.context == false) {
+        pushToken(state,stream);
+      }
+      return rval(state,stream,"separator");
+    }
+
+    // operators
+    if (greedy(stream,symbolRE,symbolWords)) {
+      return rval(state,stream,"operator");
+    }
+
+    return rval(state,stream,null);
+  }
+
+  function nongreedy(stream,re,words) {
+    if (stream.current().length == 1 && re.test(stream.current())) {
+      stream.backUp(1);
+      while (re.test(stream.peek())) {
+        stream.next();
+        if (isMember(stream.current(),words)) {
+          return true;
+        }
+      }
+      stream.backUp(stream.current().length-1);
+    }
+    return false;
+  }
+
+  function greedy(stream,re,words) {
+    if (stream.current().length == 1 && re.test(stream.current())) {
+      while (re.test(stream.peek())) {
+        stream.next();
+      }
+      while (0 < stream.current().length) {
+        if (isMember(stream.current(),words)) {
+          return true;
+        }else{
+          stream.backUp(1);
+        }
+      }
+      stream.next();
+    }
+    return false;
   }
 
   function doubleQuote(stream) {
-    return Quote(stream, '"', '\\', "string");
+    return quote(stream, '"', '\\');
   }
 
   function singleQuote(stream) {
-    return Quote(stream,'\'','\\',"atom");
+    return quote(stream,'\'','\\');
   }
 
-  function Quote(stream,quoteChar,escapeChar,tag) {
+  function quote(stream,quoteChar,escapeChar) {
     while (!stream.eol()) {
       var ch = stream.next();
       if (ch == quoteChar) {
-        return tag;
+        return true;
       }else if (ch == escapeChar) {
         stream.next();
       }
     }
-    return "error";
+    return false;
   }
 
-  return {
-    startState: function() {
-      return {};
-    },
+  function Token(stream) {
+    this.token  = stream ? stream.current() : "";
+    this.column = stream ? stream.column() : 0;
+    this.indent = stream ? stream.indentation() : 0;
+  }
+
+  function myIndent(state,textAfter) {
+    var indent = cmCfg.indentUnit;
+    var outdentWords = ["after","catch"];
+    var token = (peekToken(state)).token;
+    var wordAfter = takewhile(textAfter,/[^a-z]/);
+
+    if (isMember(token,openParenWords)) {
+      return (peekToken(state)).column+token.length;
+    }else if (token == "." || token == ""){
+      return 0;
+    }else if (token == "->") {
+      if (wordAfter == "end") {
+        return peekToken(state,2).column;
+      }else if (peekToken(state,2).token == "fun") {
+        return peekToken(state,2).column+indent;
+      }else{
+        return (peekToken(state)).indent+indent;
+      }
+    }else if (isMember(wordAfter,outdentWords)) {
+      return (peekToken(state)).indent;
+    }else{
+      return (peekToken(state)).column+indent;
+    }
+  }
+
+  function takewhile(str,re) {
+    var m = str.match(re);
+    return m ? str.slice(0,m.index) : str;
+  }
+
+  function popToken(state) {
+    return state.tokenStack.pop();
+  }
+
+  function peekToken(state,depth) {
+    var len = state.tokenStack.length;
+    var dep = (depth ? depth : 1);
+    if (len < dep) {
+      return new Token;
+    }else{
+      return state.tokenStack[len-dep];
+    }
+  }
+
+  function pushToken(state,stream) {
+    var token = stream.current();
+    var prev_token = peekToken(state).token;
+    if (isMember(token,ignoreWords)) {
+      return false;
+    }else if (drop_both(prev_token,token)) {
+      popToken(state);
+      return false;
+    }else if (drop_first(prev_token,token)) {
+      popToken(state);
+      return pushToken(state,stream);
+    }else{
+      state.tokenStack.push(new Token(stream));
+      return true;
+    }
+  }
+
+  function drop_first(open, close) {
+    switch (open+" "+close) {
+      case "when ->":       return true;
+      case "-> end":        return true;
+      case "-> .":          return true;
+      case ". .":           return true;
+      default:              return false;
+    }
+  }
 
-    token: function(stream, state) {
-      return tokenize(stream, state);
+  function drop_both(open, close) {
+    switch (open+" "+close) {
+      case "( )":         return true;
+      case "[ ]":         return true;
+      case "{ }":         return true;
+      case "<< >>":       return true;
+      case "begin end":   return true;
+      case "case end":    return true;
+      case "fun end":     return true;
+      case "if end":      return true;
+      case "receive end": return true;
+      case "try end":     return true;
+      case "-> ;":        return true;
+      default:            return false;
     }
+  }
+
+  return {
+    startState:
+      function() {
+        return {tokenStack: [],
+                context: false,
+                lastToken: null};
+      },
+
+    token:
+      function(stream, state) {
+        return tokenize(stream, state);
+      },
+
+    indent:
+      function(state, textAfter) {
+//        console.log(state.tokenStack);
+        return myIndent(state,textAfter);
+      }
   };
 });
diff --git a/mode/erlang/index.html b/mode/erlang/index.html
index 625491f25..80eefef46 100644
--- a/mode/erlang/index.html
+++ b/mode/erlang/index.html
@@ -52,6 +52,7 @@ expand_recs(_,Term) ->
       var editor = CodeMirror.fromTextArea(document.getElementById("code"), {
         lineNumbers: true,
         matchBrackets: true,
+        extraKeys: {"Tab":  "indentAuto"},
         theme: "erlang-dark"
       });
     </script>
-- 
GitLab