twl - toml-like data format and parser
typedef struct twl_tag {
int rc;
char* emsg;
double cnt;
//..opaque data
} twl_t;
typedef struct twlval_tag {
const char* k; //key
const char* t; //3 types, "n"um/"s"tr/"d"ate
double n; //num val
const char* s; //str val
const char* date; //date tostr: rfc3339 format
double y,m,d,hh,mm,ss,z,zh,zm; //date: z==zone 0/1
} twl_vt;
//--read twl format file/str
twl_t*
twl_new();
int twl_pushfile(twl_t*
obj,
const
char*
flname);
int twl_pushstr(twl_t*
obj,
const
char*
s [,
int ssz]
);
twl_t*
twl_clear(twl_t*
obj);
void*
twl_free(twl_t*
obj);
int twl_dbgout(twl_t*
obj,
FILE*
dbgfp );
//--getter
twl_vt twl_get(twl_t*
obj,
const
char*
id);
twl_foreach(const char* key, twl_vt val, twl_t* obj){..} //macro
#include "twl.h"
int main(int argc, char** argv){
twl_t* obj = twl_new();
twl_pushstr(obj, "[hw] \n twl = 10.1");
twl_pushstr(obj, NULL); //EOF
twl_vt val = twl_get(obj, "hw.twl");
printf("%s, %f\n", val.k, val.n);
twl_free(obj);
return 0;
}
//~$ cc hw.c libtwl.a -Wall -pedantic -std=c99 -D_POSIX_C_SOURCE=200112
//see EXSAMPLE for more detail code
- flname
- (char*) read file if set. "stdin" reads FILE* stdin
- obj
- (twl_t*) core obj. holds parser, hash map etc
- s
- (char*) send str/bin to twl push-parser. send EOF if set NULL
- ssz
- (int) send str/bin size if set. use strlen(s) if noset/-1
- id
- (char*) keyname for search
- key
- (char*) twl_foreach() ID for liner search
- val
- (twl_vt) twl_foreach() ID holding data
- dbgfp
- (FILE*) fp for output dbginfo. noout if set NULL(==dfl)
- obj
- twl_new() rtns core. rtn NULL if failed. obj->cnt holds keycnt.
- rc
- twl_pushfile() rtns int 0/rc<0
twl_pushstr() rtns int 0/1/rc<0 == complete/morestr/err
err == ERR_TKN / ERR_STX / ERR_INN and emsg is 'obj->emsg'. 'obj->rc'
holds the same int.
- val
- all data is double/char*. all data is init with 0/"".
never fails *val.t =='n', puts(val.date) etc
val.k : char*, ID/keyname, "aa.bb.c" etc
val.t : char*, valtype. "n/s/d" or "", num/str/date
val.n : dbl, val.t=="n" data. 0.11, 12.00 etc
val.s : char*, val.t=="s" data. "hw" etc
val.date : char*, val.t=="d" tostr data. follows rfc3339
val.ssz: dbl, val.s size. val.s can holds binary, \0 etc.
val.y/m/d/hh/mm/ss/z/zh/zm: dbl, 'z'one is 0/1 == noset/set
twl_pushfile/str() may rtns rc<0.
ERR_TKN: lex err
ERR_STX: yacc err
ERR_INN: other inner err
'obj->emsg' may holding detail msgstr.
exit() if fatal err.
twl is alt-toml data format. omit unnecessary syntax from toml. the differences
is the below. see NOTES for details:
- no nest with rval: deep nest causes troubles.
- lowercase ID only: ok: ab.xy=100 NG: Ab.xy=100
- no quote lval : ID is no quoting alnum+[._] as c-lang
- no table array : silver bullet for javascript specific
- add mulitline cmt: #*..*# etc
- add parallel set : a,b = 10,20 >> a=10; b=10
twl format target is:
- non-programmer can use with low learning cost
- a few syntax rule
- pay cost if you need to send complex data (or fix data structure)
- respect to: ini, shell, c-lang, utf8
- avoid to : XML, XSLT, python, c++, lisp, markdown
see samples for twl data read api fundtions. input data is saved to hashtb. all
value is 'const char* / double' except rc, obj->rc (int). char* data is
init with blank str "", so never fails to put(val.s) etc.
//--data.twl
# cmtline
[ab]
a=10 #ab.a=10
[aa.bb]
b="hw" #aa.bb.b="hw"
//--src.c
#include "twl.h"
int main(int argc, char** argv){
int rc=0;
twl_t* obj;
twl_vt val;
// read from file
if(0){
obj = twl_new();
; rc = twl_dbgout(obj, stderr); //dispinfo. rc<0 >> badfp+nochange
rc = twl_pushfile(obj, "data.twl"); //suc/fail == 0/rc<0
; rc = twl_dbgout(obj, NULL); // noout(==dfl)
val = twl_get(obj, "aa.bb.b");
; if( *val.t == 0 ){ puts("bad key"); return 1; }
printf("%s, %s, %f\n", val.k, val.s, obj->cnt); //key,val,cntkey
twl_free(obj);
}
//read using push-style parsing
obj = twl_new();
const char* s = "[ab]\n a = 10 ; d = 2000-01-01";
rc = twl_pushstr(obj, s); //rc=1/0 (more/complete) err<0
// (obj,s, -1) >> use -1 == strlen(s) if ag3 isnt
; if(rc<0){ puts(obj->emsg); return rc; }
rc = twl_pushstr(obj, NULL); //NULL >> send EOF
val = twl_get(obj, "ab.a");
printf("%s, %f\n", val.t, val.n); // "n", 10.0
twl_foreach(k, v, obj){
puts(k); //== v.k, char* key/id, "ab.a" etc
if( strcmp(v.t, "n")==0 ){ printf("num: %f\n", v.n); }
if( *v.t == 's' ){ printf("str : %s\n", v.s); }
if( *v.t == 'd' ){ printf("date: %s\n", v.date); }
}
obj = twl_clear(obj); //read other file/str if needs
printf("%f\n", obj->cnt); //==0, obj->cnt == holding keycnt
twl_free(obj);
return 0;
}
//$ cc src.c libtwl.a -Wall -pedantic -std=c99 -D_POSIX_C_SOURCE=200112
---bench mark:
code: a[n]=num / twl_pushstr(obj, "key=10 ") etc
set : 8.3ms <<<< 8842ms
FAST: a[n] (1) <<<< twl_set (1000) :SLOW
get : 14.4ms << 216.8ms
FAST: a[n] (1) << twl_get (15) :SLOW
-O0
real 332.267 ms : ./twl.tmp.c 4674: msg:sprintf() ovh 1000*1000
real 341.507 ms : ./twl.tmp.c 4686: msg:a[n] set 1000*1000
real 14.469 ms : ./twl.tmp.c 4693: msg:a[n] get 1000*1000
real 14608.907 ms: ./twl.tmp.c 4702: msg:twl_set 1000*1000
real 449.135 ms : ./twl.tmp.c 4712: msg:twl_get 1000*1000
-O3
real 356.416 ms : ./twl.tmp.c 4674: msg:sprintf() ovh 1000*1000
real 341.656 ms : ./twl.tmp.c 4686: msg:a[n] set 1000*1000
real 0.001 ms : ./twl.tmp.c 4693: msg:a[n] get 1000*1000
real 8842.322 ms : ./twl.tmp.c 4702: msg:twl_set 1000*1000
real 216.870 ms : ./twl.tmp.c 4712: msg:twl_get 1000*1000
---
--- twl syntax info
# twl syntax. linecmt is '#' or '//', allow not BOS
#* multiline cmt is #*...* # or / *..* /, toml >> #...(nl) only
- preprocess \
\+(nl) marges lines then tokenize. same as c-lang
- newline: NL is '\n' or '\r\n'
- utf8 : file format allows only utf8 and (nl)(EOF)
- ab.x_y : ID(left hand val) is alnum + dot.ul_
- Ab.xy : NG. ID allows only lowercase
- valtype: VAL(right hand val) is 3types: floating num, str, date
- [ab.xy]: LABEL [..] add prefix name to ID
- a=1;a=2: (twl_ext) 2nd assign overwrites the before: >> a==2
- sep ; : (twl_ext) semicolon ';' is ignored (or works as token sep)
- a,b=1,2: (twl_ext) parallel assign: >> a=1 b=2
- 1_0 : (twl_ext) NUM ignores '_' except token head: >> a=10
- #_twl: : (twl_ext) magic cmt token '#_twl:' is reserved
*#
#_twl:v1.0.0 //file syntax is for v1.0.0 (currently magic is only this)
//_twl:v1.0.0 //this is not magic
[num] #cmt
n0 = 10 //num.a0 = 10.0 (floating num only)
n0 = 10.1 //num.a0 = 10.1 (twl:overwrite toml:error)
n1 = -_.11 //-0.11: token >> del '_' >> conv with strtod()
n2 = //allow noset == ignored
n3=1 n4=2 //valid. free format
n5=1;n6=2 //';' == '\n' in most
[str_bin]
s0 = "\u0000h\0w" //s0 size == 4: treat as utf8 sequences
s1 = "\100s\ntr" //"" conv c-lang esc except \xhh. lit can holds raw nl
s2 = '@s
tr' //'' holds rawlit. '' cant holds single-quote (same as shell)
#* hex esc \xhh.. is non-portable (c99: 6.4.4.4 Character constants)
- \ooo is 1-3digits(1byte 0-255) but nolim with \xhh..(1-100digits etc)
- \xhh.. val depends on endian: 0x0a11 / 0x110a etc
- use \ooo for binary and \u,\U for i18n charactors
*#
//heredoc
s3 = '''alnum_end //''' or """, 3 or more quote. '''''' is valid.
stop hdoc with (nl)headID(nl). ID chars is alnum+[._], same as lval.
quotes follows ending ID is valid. (nl)headID'"''"(nl) works as end.
this is help you to adjust syntax highlight in manually.
start with s-quote takes completely rawstr.
set blankline if you want EOS nl "str\n".
alnum_end"''
//hdoc with c-lang esc
s4 = """end
w-quote allows esc\n \u0060 etc. \n
end
s5 = """-end
"""- or '''- removes leading raw tabs '\t', same as posix-shell '<<-'
\t\t123 ...esc char "\t,\u0009" isnt stripped
end"""
_='''___
use assign syntax as cmt like shell, :<<'CMT'...CMT
___'''
[date.time.etc] //date format: follows rfc3339
d.a0 = 0020-01-01 12:00:12.345 //year: more than 4digits
d.a1 = 2000-01-01T12:00:23.456+00:00 //date.time.etc.d.a1 = ...
d.a2 = 2000-01-01 //(twl_ext) date only >> others are init with 0
d.a3 = 12:00:22.3345 //(twl_ext) time only
d.a4 = -20000-01-01T00:00:00.000Z //(twl_ext) sign and over 4digits year
#*(twl_ext)
lowercase sep 't/z' is invalid. T/(sp)/Z only.
y,m,d,hh,mm,ss,z,zh,zm holds double(64bit floating point) num
*#
[] //blank LABEL set no prefix
base.num=10 //(no_prefix)base.num == 10
//(twl_ext) parallel assign (ID list)=(VAL list). ignore uneven ID/VAL
a, b, c, d
=10.9, "hw", 2000-01-01 //'d' is ignored
a = 10.9, "hw", 2000-01-01 //str, date is ignored
# ---
#*---appendix: not collect BNF, but helps you
//yacc
stmt: expr
| stmt expr ;
expr: LB
| lve
| ass ;
ass : lve rv
| ass ',' rv ; //VAL list
lve : lv '=' ;
lv : ID
| lv ',' ID ; //ID list
rv: num | lit | date ;
num : NUM ;
lit : DLIT | SLIT | DDOC | SDOC ;
date : DATE_
| DATE_ DATET
| DATE_ DATET DATEZ
| TIME ;
//lex: marge \(nl) lines with preprocess
NL : \n|\r\n
LABEL: '[' ID ']'
ID : ([a-z_][a-z0-9_]*)(\.[a-z_][a-z0-9_]*)*
NUM : [-+.0-9][_xXoO0-9a-fA-F.lL]* //del '_' and suc to conv strtod()
DLIT : ["]([^"]|\\.)*["} //conv esc
SLIT : [']([^'])*[']
DDOC : """+\(ID\).* NL \1 ["']* NL //BRE+shortest. conv esc
SDOC : '''+\(ID\).* NL \1 ["']* NL
DATE_: [-+]?[0-9][0-9][0-9][0-9]+-[0-1][0-9]-[0-3][0-9]
DATET: [T ][0-2][0-9]:[0-6][0-9]:[0-6][0-9](\.[0-9]+)?
DATEZ: [Z]|[\-+][0-2][0-9]\:[0-6][0-9]
TIME: [0-2][0-9]:[0-6][0-9]:[0-6][0-9](\.[0-9]+)?
1979-05-27T07:32:00Z
1979-05-27T00:32:00-07:00
1979-05-27T00:32:00.999999-07:00
1979-05-27 07:32:00.1234+09:00
2000-05-27 07:32:00
2000-01-01
12:00:00
12:00:00.12345
*#
POSIX.1-2001+ (-D_POSIX_C_SOURCE=200112L etc)
Copyright 2022 momi-g, GPLv3+
2022-07-12 v1.0.2 (2022-06-13 v1.0.0)
https://en.wikipedia.org/wiki/Configuration_file