Skip to content
Snippets Groups Projects
Commit e12fbcbd authored by Charles Skelton's avatar Charles Skelton Committed by Marijn Haverbeke
Browse files

Add a Q mode

parent b76d6989
No related branches found
No related tags found
No related merge requests found
......@@ -100,6 +100,7 @@
<option value="http://codemirror.net/mode/plsql/plsql.js">plsql.js</option>
<option value="http://codemirror.net/mode/properties/properties.js">properties.js</option>
<option value="http://codemirror.net/mode/python/python.js">python.js</option>
<option value="http://codemirror.net/mode/q/q.js">q.js</option>
<option value="http://codemirror.net/mode/r/r.js">r.js</option>
<option value="http://codemirror.net/mode/rpm/changes/changes.js">rpm/changes.js</option>
<option value="http://codemirror.net/mode/rpm/spec/spec.js">rpm/spec.js</option>
......
......@@ -55,6 +55,7 @@ out some of the more obscure ones.</p>
<li><a href="../mode/pig/index.html">Pig Latin</a></li>
<li><a href="../mode/properties/index.html">Properties files</a></li>
<li><a href="../mode/python/index.html">Python</a></li>
<li><a href="../mode/q/index.html">Q</a></li>
<li><a href="../mode/r/index.html">R</a></li>
<li>RPM <a href="../mode/rpm/spec/index.html">spec</a> and <a href="../mode/rpm/changes/index.html">changelog</a></li>
<li><a href="../mode/rst/index.html">reStructuredText</a></li>
......
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>CodeMirror: Q mode</title>
<link rel="stylesheet" href="../../lib/codemirror.css">
<script src="../../lib/codemirror.js"></script>
<script src="../../addon/edit/matchbrackets.js"></script>
<script src="q.js"></script>
<link rel="stylesheet" href="../../doc/docs.css">
<style type="text/css">.CodeMirror {border-top: 1px solid black; border-bottom: 1px solid black;}</style>
</head>
<body>
<h1>CodeMirror: Q mode</h1>
<div><textarea id="code" name="code">
/ utilities to quickly load a csv file - for more exhaustive analysis of the csv contents see csvguess.q
/ 2009.09.20 - updated to match latest csvguess.q
/ .csv.colhdrs[file] - return a list of colhdrs from file
/ info:.csv.info[file] - return a table of information about the file
/ columns are:
/ c - column name; ci - column index; t - load type; mw - max width;
/ dchar - distinct characters in values; rule - rule that caught the type
/ maybe - needs checking, _could_ be say a date, but perhaps just a float?
/ .csv.info0[file;onlycols] - like .csv.info except that it only analyses <onlycols>
/ example:
/ info:.csv.info0[file;(.csv.colhdrs file)like"*price"]
/ info:.csv.infolike[file;"*price"]
/ show delete from info where t=" "
/ .csv.data[file;info] - use the info from .csv.info to read the data
/ .csv.data10[file;info] - like .csv.data but only returns the first 10 rows
/ bulkload[file;info] - bulk loads file into table DATA (which must be already defined :: DATA:() )
/ .csv.read[file]/read10[file] - for when you don't care about checking/tweaking the <info> before reading
\d .csv
DELIM:","
ZAPHDRS:0b / lowercase and remove _ from colhdrs (junk characters are always removed)
WIDTHHDR:25000 / number of characters read to get the header
READLINES:222 / number of lines read and used to guess the types
SYMMAXWIDTH:11 / character columns narrower than this are stored as symbols
SYMMAXGR:10 / max symbol granularity% before we give up and keep as a * string
FORCECHARWIDTH:30 / every field (of any type) with values this wide or more is forced to character "*"
DISCARDEMPTY:0b / completely ignore empty columns if true else set them to "C"
CHUNKSIZE:50000000 / used in fs2 (modified .Q.fs)
k)nameltrim:{$[~@x;.z.s'x;~(*x)in aA:.Q.a,.Q.A;(+/&\~x in aA)_x;x]}
k)fs2:{[f;s]((-7!s)>){[f;s;x]i:1+last@&0xa=r:1:(s;x;CHUNKSIZE);f@`\:i#r;x+i}[f;s]/0j}
cleanhdrs:{{$[ZAPHDRS;lower x except"_";x]}x where x in DELIM,.Q.an}
cancast:{nw:x$"";if[not x in"BXCS";nw:(min 0#;max 0#;::)@\:nw];$[not any nw in x$(11&count y)#y;$[11<count y;not any nw in x$y;1b];0b]}
read:{[file]data[file;info[file]]}
read10:{[file]data10[file;info[file]]}
colhdrs:{[file]
`$nameltrim DELIM vs cleanhdrs first read0(file;0;1+first where 0xa=read1(file;0;WIDTHHDR))}
data:{[file;info]
(exec c from info where not t=" ")xcol(exec t from info;enlist DELIM)0:file}
data10:{[file;info]
data[;info](file;0;1+last 11#where 0xa=read1(file;0;15*WIDTHHDR))}
info0:{[file;onlycols]
colhdrs:`$nameltrim DELIM vs cleanhdrs first head:read0(file;0;1+last where 0xa=read1(file;0;WIDTHHDR));
loadfmts:(count colhdrs)#"S";if[count onlycols;loadfmts[where not colhdrs in onlycols]:"C"];
breaks:where 0xa=read1(file;0;floor(10+READLINES)*WIDTHHDR%count head);
nas:count as:colhdrs xcol(loadfmts;enlist DELIM)0:(file;0;1+last((1+READLINES)&count breaks)#breaks);
info:([]c:key flip as;v:value flip as);as:();
reserved:key`.q;reserved,:.Q.res;reserved,:`i;
info:update res:c in reserved from info;
info:update ci:i,t:"?",ipa:0b,mdot:0,mw:0,rule:0,gr:0,ndv:0,maybe:0b,empty:0b,j10:0b,j12:0b from info;
info:update ci:`s#ci from info;
if[count onlycols;info:update t:" ",rule:10 from info where not c in onlycols];
info:update sdv:{string(distinct x)except`}peach v from info;
info:update ndv:count each sdv from info;
info:update gr:floor 0.5+100*ndv%nas,mw:{max count each x}peach sdv from info where 0<ndv;
info:update t:"*",rule:20 from info where mw>.csv.FORCECHARWIDTH; / long values
info:update t:"C "[.csv.DISCARDEMPTY],rule:30,empty:1b from info where t="?",mw=0; / empty columns
info:update dchar:{asc distinct raze x}peach sdv from info where t="?";
info:update mdot:{max sum each"."=x}peach sdv from info where t="?",{"."in x}each dchar;
info:update t:"n",rule:40 from info where t="?",{any x in"0123456789"}each dchar; / vaguely numeric..
info:update t:"I",rule:50,ipa:1b from info where t="n",mw within 7 15,mdot=3,{all x in".0123456789"}each dchar,.csv.cancast["I"]peach sdv; / ip-address
info:update t:"J",rule:60 from info where t="n",mdot=0,{all x in"+-0123456789"}each dchar,.csv.cancast["J"]peach sdv;
info:update t:"I",rule:70 from info where t="J",mw<12,.csv.cancast["I"]peach sdv;
info:update t:"H",rule:80 from info where t="I",mw<7,.csv.cancast["H"]peach sdv;
info:update t:"F",rule:90 from info where t="n",mdot<2,mw>1,.csv.cancast["F"]peach sdv;
info:update t:"E",rule:100,maybe:1b from info where t="F",mw<9;
info:update t:"M",rule:110,maybe:1b from info where t in"nIHEF",mdot<2,mw within 4 7,.csv.cancast["M"]peach sdv;
info:update t:"D",rule:120,maybe:1b from info where t in"nI",mdot in 0 2,mw within 6 11,.csv.cancast["D"]peach sdv;
info:update t:"V",rule:130,maybe:1b from info where t="I",mw in 5 6,7<count each dchar,{all x like"*[0-9][0-5][0-9][0-5][0-9]"}peach sdv,.csv.cancast["V"]peach sdv; / 235959 12345
info:update t:"U",rule:140,maybe:1b from info where t="H",mw in 3 4,7<count each dchar,{all x like"*[0-9][0-5][0-9]"}peach sdv,.csv.cancast["U"]peach sdv; /2359
info:update t:"U",rule:150,maybe:0b from info where t="n",mw in 4 5,mdot=0,{all x like"*[0-9]:[0-5][0-9]"}peach sdv,.csv.cancast["U"]peach sdv;
info:update t:"T",rule:160,maybe:0b from info where t="n",mw within 7 12,mdot<2,{all x like"*[0-9]:[0-5][0-9]:[0-5][0-9]*"}peach sdv,.csv.cancast["T"]peach sdv;
info:update t:"V",rule:170,maybe:0b from info where t="T",mw in 7 8,mdot=0,.csv.cancast["V"]peach sdv;
info:update t:"T",rule:180,maybe:1b from info where t in"EF",mw within 7 10,mdot=1,{all x like"*[0-9][0-5][0-9][0-5][0-9].*"}peach sdv,.csv.cancast["T"]peach sdv;
info:update t:"Z",rule:190,maybe:0b from info where t="n",mw within 11 24,mdot<4,.csv.cancast["Z"]peach sdv;
info:update t:"P",rule:200,maybe:1b from info where t="n",mw within 12 29,mdot<4,{all x like"[12]*"}peach sdv,.csv.cancast["P"]peach sdv;
info:update t:"N",rule:210,maybe:1b from info where t="n",mw within 3 28,mdot=1,.csv.cancast["N"]peach sdv;
info:update t:"?",rule:220,maybe:0b from info where t="n"; / reset remaining maybe numeric
info:update t:"C",rule:230,maybe:0b from info where t="?",mw=1; / char
info:update t:"B",rule:240,maybe:0b from info where t in"HC",mw=1,mdot=0,{$[all x in"01tTfFyYnN";(any"0fFnN"in x)and any"1tTyY"in x;0b]}each dchar; / boolean
info:update t:"B",rule:250,maybe:1b from info where t in"HC",mw=1,mdot=0,{all x in"01tTfFyYnN"}each dchar; / boolean
info:update t:"X",rule:260,maybe:0b from info where t="?",mw=2,{$[all x in"0123456789abcdefABCDEF";(any .Q.n in x)and any"abcdefABCDEF"in x;0b]}each dchar; /hex
info:update t:"S",rule:270,maybe:1b from info where t="?",mw<.csv.SYMMAXWIDTH,mw>1,gr<.csv.SYMMAXGR; / symbols (max width permitting)
info:update t:"*",rule:280,maybe:0b from info where t="?"; / the rest as strings
/ flag those S/* columns which could be encoded to integers (.Q.j10/x10/j12/x12) to avoid symbols
info:update j12:1b from info where t in"S*",mw<13,{all x in .Q.nA}each dchar;
info:update j10:1b from info where t in"S*",mw<11,{all x in .Q.b6}each dchar;
select c,ci,t,maybe,empty,res,j10,j12,ipa,mw,mdot,rule,gr,ndv,dchar from info}
info:info0[;()] / by default don't restrict columns
infolike:{[file;pattern] info0[file;{x where x like y}[lower colhdrs[file];pattern]]} / .csv.infolike[file;"*time"]
\d .
/ DATA:()
bulkload:{[file;info]
if[not`DATA in system"v";'`DATA.not.defined];
if[count DATA;'`DATA.not.empty];
loadhdrs:exec c from info where not t=" ";loadfmts:exec t from info;
.csv.fs2[{[file;loadhdrs;loadfmts] `DATA insert $[count DATA;flip loadhdrs!(loadfmts;.csv.DELIM)0:file;loadhdrs xcol(loadfmts;enlist .csv.DELIM)0:file]}[file;loadhdrs;loadfmts]];
count DATA}
@[.:;"\\l csvutil.custom.q";::]; / save your custom settings in csvutil.custom.q to override those set at the beginning of the file
</textarea></div>
<script>
var editor = CodeMirror.fromTextArea(document.getElementById("code"), {
lineNumbers: true,
matchBrackets: true
});
</script>
<p><strong>MIME type defined:</strong> <code>text/x-q</code>.</p>
</body>
</html>
CodeMirror.defineMode("q",function(config){
var indentUnit=config.indentUnit,
curPunc,
keywords=buildRE(["abs","acos","aj","aj0","all","and","any","asc","asin","asof","atan","attr","avg","avgs","bin","by","ceiling","cols","cor","cos","count","cov","cross","csv","cut","delete","deltas","desc","dev","differ","distinct","div","do","each","ej","enlist","eval","except","exec","exit","exp","fby","fills","first","fkeys","flip","floor","from","get","getenv","group","gtime","hclose","hcount","hdel","hopen","hsym","iasc","idesc","if","ij","in","insert","inter","inv","key","keys","last","like","list","lj","load","log","lower","lsq","ltime","ltrim","mavg","max","maxs","mcount","md5","mdev","med","meta","min","mins","mmax","mmin","mmu","mod","msum","neg","next","not","null","or","over","parse","peach","pj","plist","prd","prds","prev","prior","rand","rank","ratios","raze","read0","read1","reciprocal","reverse","rload","rotate","rsave","rtrim","save","scan","select","set","setenv","show","signum","sin","sqrt","ss","ssr","string","sublist","sum","sums","sv","system","tables","tan","til","trim","txf","type","uj","ungroup","union","update","upper","upsert","value","var","view","views","vs","wavg","where","where","while","within","wj","wj1","wsum","xasc","xbar","xcol","xcols","xdesc","xexp","xgroup","xkey","xlog","xprev","xrank"]),
E=/[|/&^!+:\\\-*%$=~#;@><,?_\'\"\[\(\]\)\s{}]/;
function buildRE(w){return new RegExp("^("+w.join("|")+")$");}
function tokenBase(stream,state){
var sol=stream.sol(),c=stream.next();
curPunc=null;
if(sol)
if(c=="/")
return(state.tokenize=tokenLineComment)(stream,state);
else if(c=="\\"){
if(stream.eol()||/\s/.test(stream.peek()))
return stream.skipToEnd(),/^\\\s*$/.test(stream.current())?(state.tokenize=tokenCommentToEOF)(stream, state):state.tokenize=tokenBase,"comment";
else
return state.tokenize=tokenBase,"builtin";
}
if(/\s/.test(c))
return stream.peek()=="/"?(stream.skipToEnd(),"comment"):"whitespace";
if(c=='"')
return(state.tokenize=tokenString)(stream,state);
if(c=='`')
return stream.eatWhile(/[A-Z|a-z|\d|_|:|\/|\.]/),"symbol";
if(("."==c&&/\d/.test(stream.peek()))||/\d/.test(c)){
var t=null;
stream.backUp(1);
if(stream.match(/^\d{4}\.\d{2}(m|\.\d{2}([D|T](\d{2}(:\d{2}(:\d{2}(\.\d{1,9})?)?)?)?)?)/)
|| stream.match(/^\d+D(\d{2}(:\d{2}(:\d{2}(\.\d{1,9})?)?)?)/)
|| stream.match(/^\d{2}:\d{2}(:\d{2}(\.\d{1,9})?)?/)
|| stream.match(/^\d+[ptuv]{1}/))
t="temporal";
else if(stream.match(/^0[NwW]{1}/)
|| stream.match(/^0x[\d|a-f|A-F]*/)
|| stream.match(/^[0|1]+[b]{1}/)
|| stream.match(/^\d+[chijn]{1}/)
|| stream.match(/-?\d*(\.\d*)?(e[+\-]?\d+)?(e|f)?/))
t="number";
return(t&&(!(c=stream.peek())||E.test(c)))?t:(stream.next(),"error");
}
if(/[A-Z|a-z]|\./.test(c))
return stream.eatWhile(/[A-Z|a-z|\.|_|\d]/),keywords.test(stream.current())?"keyword":"variable";
if(/[|/&^!+:\\\-*%$=~#;@><\.,?_\']/.test(c))
return null;
if(/[{}\(\[\]\)]/.test(c))
return null;
return"error";
}
function tokenLineComment(stream,state){
return stream.skipToEnd(),/\/\s*$/.test(stream.current())?(state.tokenize=tokenBlockComment)(stream,state):(state.tokenize=tokenBase),"comment";
}
function tokenBlockComment(stream,state){
var f=stream.sol()&&stream.peek()=="\\";
stream.skipToEnd();
if(f&&/^\\\s*$/.test(stream.current()))
state.tokenize=tokenBase;
return"comment";
}
function tokenCommentToEOF(stream,state){return stream.skipToEnd(),"comment";}
function tokenString(stream,state){
var escaped=false,next,end=false;
while((next=stream.next())){
if(next=="\""&&!escaped){end=true;break;}
escaped=!escaped&&next=="\\";
}
if(end)state.tokenize=tokenBase;
return"string";
}
function pushContext(state,type,col){state.context={prev:state.context,indent:state.indent,col:col,type:type};}
function popContext(state){state.indent=state.context.indent;state.context=state.context.prev;}
return{
startState:function(base){
return{tokenize:tokenBase,
context:null,
indent:0,
col:0};
},
token:function(stream,state){
if(stream.sol()){
if(state.context&&state.context.align==null)
state.context.align=false;
state.indent=stream.indentation();
}
//if (stream.eatSpace()) return null;
var style=state.tokenize(stream,state);
if(style!="comment"&&state.context&&state.context.align==null&&state.context.type!="pattern"){
state.context.align=true;
}
if(curPunc=="(")pushContext(state,")",stream.column());
else if(curPunc=="[")pushContext(state,"]",stream.column());
else if(curPunc=="{")pushContext(state,"}",stream.column());
else if(/[\]\}\)]/.test(curPunc)){
while(state.context&&state.context.type=="pattern")popContext(state);
if(state.context&&curPunc==state.context.type)popContext(state);
}
else if(curPunc=="."&&state.context&&state.context.type=="pattern")popContext(state);
else if(/atom|string|variable/.test(style)&&state.context){
if(/[\}\]]/.test(state.context.type))
pushContext(state,"pattern",stream.column());
else if(state.context.type=="pattern"&&!state.context.align){
state.context.align=true;
state.context.col=stream.column();
}
}
return style;
},
indent:function(state,textAfter){
var firstChar=textAfter&&textAfter.charAt(0);
var context=state.context;
if(/[\]\}]/.test(firstChar))
while (context&&context.type=="pattern")context=context.prev;
var closing=context&&firstChar==context.type;
if(!context)
return 0;
else if(context.type=="pattern")
return context.col;
else if(context.align)
return context.col+(closing?0:1);
else
return context.indent+(closing?0:indentUnit);
}
};
});
CodeMirror.defineMIME("text/x-q","q");
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment