開発環境
- OS: macOS High Sierra - Apple
- Text Editor: Emacs
- プログラミング言語: Python3
- ANTLR4(parser generator)
前回の続き。
意外にに早くできたから、前回言ったように修正、追加もして、さらにパッケージ化してPyPIに上げて、pipで(python3対応だからpip3、macOSも標準搭載なのバージョン2.7ではなく3.xになってほしい…)でインストールできるようにしてみた。
(早くできたというよりは、とりあえず細かいことは気にせずに書いただけだったり。ということで、load(ファイル名)、dump(オブジェクト, ファイル名)以外の機能しかなかったり、その機能も大雑把だったり、辞書のキーに使えない(リスト(配列)、辞書)は文字列に変換して済ませてたりなど。(´・_・`))
SION.g4
// Created by kamimura on 2018/07/21.
// Copyright © 2018 kamimura. All rights reserved.
grammar SION;
SI_minus: '-';
SI_lsb: '[';
SI_rsb: ']';
SI_comma: ',';
SI_quote: '"';
SI_colon: ':';
SI_dot: '.';
SI_nil: 'nil';
SI_true: 'true';
SI_false: 'false';
si_self: si_literal
| si_array
| si_dict
;
si_array: SI_lsb SI_rsb
| SI_lsb si_array_items SI_rsb
;
si_array_items: si_self (SI_comma si_self)*;
si_dict: SI_lsb si_dict_pairs SI_rsb
| SI_lsb SI_colon SI_rsb
;
si_dict_pairs: si_dict_pair (SI_comma si_dict_pair)*;
si_dict_pair: si_self SI_colon si_self;
si_literal: si_date
| si_data
| si_ints
| si_doubles
| si_string
| si_bool
| si_nil
;
si_ints: SI_minus? si_int;
si_doubles: SI_minus? SI_double;
si_bool: si_true
| si_false
;
si_true: SI_true;
si_false: SI_false;
si_nil: SI_nil;
si_int: SI_bin
| SI_oct
| SI_decimal
| SI_hex
;
si_data: SI_data;
SI_data: SI_data_pre SI_base64 SI_data_post;
fragment SI_data_pre: '.Data(';
fragment SI_base64: SI_quote SI_base64_item* SI_quote;
fragment SI_base64_item: [a-zA-Z0-9+/=];
fragment SI_data_post: ')';
si_date: '.Date(' (si_doubles | si_ints) ')';
SI_bin: SI_bin_pre SI_bin_digit SI_bin_digit_under*;
fragment SI_bin_pre: '0b';
fragment SI_bin_digit: [01];
fragment SI_bin_digit_under: SI_bin_digit
| SI_under;
SI_oct: SI_oct_pre SI_oct_digit SI_oct_digit_under*;
fragment SI_oct_pre: '0o';
fragment SI_oct_digit: [0-7] ;
fragment SI_oct_digit_under: SI_oct_digit
| SI_under
;
SI_decimal: SI_decimal_digit SI_decimal_digit_under*;
fragment SI_decimal_digit: [0-9] ;
fragment SI_decimal_digit_under: SI_decimal_digit
| SI_under ;
SI_hex : SI_hex_pre SI_hex_digit SI_hex_digit_under*;
fragment SI_hex_pre: '0x';
fragment SI_hex_digit : [0-9a-fA-F] ;
fragment SI_hex_digit_under: SI_hex_digit
| SI_under
;
SI_double: SI_decimal SI_decimal_frac? SI_decimal_exp?
| SI_hex SI_hex_frac? SI_hex_exp
;
fragment SI_decimal_frac: SI_dot SI_decimal;
fragment SI_decimal_exp: ('e'|'E') SI_sign? SI_decimal;
fragment SI_hex_frac: SI_dot SI_hex_digit SI_hex_digit_under*;
fragment SI_hex_exp: ('p'|'P') SI_sign? SI_decimal;
fragment SI_sign: ('+'|'-');
fragment SI_under: '_';
si_string: SI_string_literal;
SI_string_literal: SI_quote SI_char* SI_quote;
fragment SI_char: SI_esc
| ~["\r\n\\]
;
fragment SI_esc: '\\' [0\\tnr"'];
SI_ws : [ \r\n\t\u0000\u000b\u000c]+ -> skip;
SI_comment : '//' .*? '\n' -> skip;
SIONVisiter.py
# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
# Generated from SION.g4 by ANTLR 4.7.1
from antlr4 import *
if __name__ is not None and "." in __name__:
from .SIONParser import SIONParser
else:
from SIONParser import SIONParser
import datetime
def num_rem_under(n):
return n.replace('_', '')
def str_esc(s):
for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r')]:
s = s.replace(o, n)
return s
# This class defines a complete generic visitor for a parse tree produced by SIONParser.
class SIONVisitor(ParseTreeVisitor):
# Visit a parse tree produced by SIONParser#si_self.
def visitSi_self(self, ctx: SIONParser.Si_selfContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_array.
def visitSi_array(self, ctx: SIONParser.Si_arrayContext):
if ctx.si_array_items():
a = self.visit(ctx.si_array_items())
else:
a = []
return a
# Visit a parse tree produced by SIONParser#si_array_items.
def visitSi_array_items(self, ctx: SIONParser.Si_array_itemsContext):
result = [self.visit(t) for t in ctx.si_self()]
return result
# Visit a parse tree produced by SIONParser#si_dict.
def visitSi_dict(self, ctx: SIONParser.Si_dictContext):
if ctx.si_dict_pairs():
d = self.visit(ctx.si_dict_pairs())
else:
d = {}
return d
# Visit a parse tree produced by SIONParser#si_dict_pairs.
def visitSi_dict_pairs(self, ctx: SIONParser.Si_dict_pairsContext):
kvs = [self.visit(t) for t in ctx.si_dict_pair()]
d = {}
for k, v in kvs:
if type(k) in [list, dict]:
d[str(k)] = v
else:
d[k] = v
return d
# Visit a parse tree produced by SIONParser#si_dict_pair.
def visitSi_dict_pair(self, ctx: SIONParser.Si_dict_pairContext):
return [self.visit(t) for t in ctx.si_self()]
# Visit a parse tree produced by SIONParser#si_literal.
def visitSi_literal(self, ctx: SIONParser.Si_literalContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_ints.
def visitSi_ints(self, ctx: SIONParser.Si_intsContext):
if ctx.SI_minus():
sign = -1
else:
sign = 1
return sign * self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_doubles.
def visitSi_doubles(self, ctx: SIONParser.Si_doublesContext):
if ctx.SI_minus():
sign = -1
else:
sign = 1
text = num_rem_under(ctx.SI_double().getText())
if text[:2] == '0x':
n = float.fromhex(text)
else:
n = float(text)
return sign * n
# Visit a parse tree produced by SIONParser#si_bool.
def visitSi_bool(self, ctx: SIONParser.Si_boolContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_true.
def visitSi_true(self, ctx: SIONParser.Si_trueContext):
return True
# Visit a parse tree produced by SIONParser#si_false.
def visitSi_false(self, ctx: SIONParser.Si_falseContext):
return False
# Visit a parse tree produced by SIONParser#si_nil.
def visitSi_nil(self, ctx: SIONParser.Si_nilContext):
return None
# Visit a parse tree produced by SIONParser#si_int.
def visitSi_int(self, ctx: SIONParser.Si_intContext):
if ctx.SI_bin():
n = int(num_rem_under(ctx.SI_bin().getText()), 2)
elif ctx.SI_oct():
n = int(num_rem_under(ctx.SI_oct().getText()), 8)
elif ctx.SI_decimal():
n = int(num_rem_under(ctx.SI_decimal().getText()), 10)
elif ctx.SI_hex():
n = int(num_rem_under(ctx.SI_hex().getText()), 16)
return n
# Visit a parse tree produced by SIONParser#si_data.
def visitSi_data(self, ctx: SIONParser.Si_dataContext):
return ctx.SI_data().getText()[7:-2].encode('ascii')
# Visit a parse tree produced by SIONParser#si_date.
def visitSi_date(self, ctx: SIONParser.Si_dateContext):
if ctx.si_doubles():
t = self.visit(ctx.si_doubles())
else:
t = self.visit(ctx.si_ints())
d = datetime.datetime.fromtimestamp(t)
return d
# Visit a parse tree produced by SIONParser#si_string.
def visitSi_string(self, ctx: SIONParser.Si_stringContext):
return ctx.SI_string_literal().getText()[1:-1]
del SIONParser
sion.py
#!/usr/bin/env python3
# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
import sys
from antlr4 import *
from SIONLexer import SIONLexer
from SIONParser import SIONParser
from SIONVisitor import SIONVisitor
import datetime
def load(filename: str, encoding='utf-8') -> object:
fs = FileStream(filename, encoding='utf-8')
lexer = SIONLexer(fs)
tokens = CommonTokenStream(lexer)
parser = SIONParser(tokens)
# tree = parser.si_self()
tree = parser.si_self()
visitor = SIONVisitor()
return visitor.visit(tree)
def str_esc(s):
for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r'), ('\\', '\\\\')]:
s = s.replace(o, n)
return s
def dump_file(obj, file):
t = type(obj)
if obj is None:
print('nil', file=file, end='')
elif t == bool:
if obj:
print('ture', file=file, end='')
else:
print('false', file=file, end='')
elif t in {int, float}:
print(obj, file=file, end='')
elif t == str:
print(f'"{str_esc(obj)}"', file=file, end='')
elif t == bytes:
print(f'.Data("{str(obj)[2:-1]}")', file=file, end='')
elif t == datetime.datetime:
print(f'.Date({t.timestamp(obj)})', file=file, end='')
elif t in {list, tuple}:
print(f'[', file=file, end='')
if len(obj) > 0:
for o in obj[:-1]:
dump_file(o, file)
print(',', file=file, end='')
dump_file(obj[-1], file)
print(']', file=file, end='')
elif t == dict:
print('[', file=file, end='')
ks = list(obj.keys())
if len(ks) == 0:
print(':', file=file, end='')
elif len(ks) == 1:
dump_file(ks[0], file)
print(':', file=file, end='')
dump_file(obj[ks[0]], file)
else:
for k in ks[:-1]:
dump_file(k, file)
print(':', file=file, end='')
dump_file(obj[k], file)
print(',', file=file, end='')
dump_file(ks[-1], file)
print(':', file=file, end='')
dump_file(obj[ks[-1]], file)
print(']', file=file, end='')
def dump(obj, filename):
with open(filename, 'w') as file:
dump_file(obj, file)
if __name__ == '__main__':
if len(sys.argv) > 1:
filename = sys.argv[1]
else:
filename = 'test/t.sion'
obj = load(filename)
print(obj)
dump(obj, 'test/output.sion')
入出力結果(Terminal, Jupyter(IPython))
$ ./sion.py
{'array': [None, True, 1, 1.0, 'one', [1], {'one': 1.0}], 'bool': True, 'data': b'R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', 'date': datetime.datetime(1970, 1, 1, 9, 0), 'dictionary': {'array': [], 'bool': False, 'double': 0.0, 'int': 0, 'nil': None, 'object': {}, 'string': ''}, 'double': 42.195, 'int': -42, 'nil': None, 'string': '漢字、カタカナ、ひらがなの入ったstring😇', 'url': 'https://github.com/dankogai/', None: 'Unlike JSON and Property Lists,', True: 'non-String keys.', '[]': 'like', '{}': 'Map of ECMAScript.'}
$ cat test/t.sion
[
"array" : [
nil,
true,
1, // Int in decimal
1.0, // Double in decimal
"one",
[1],
["one" : 1.0]
],
"bool" : true,
"data" : .Data("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"),
"date" : .Date(0x0p+0),
"dictionary" : [
"array" : [],
"bool" : false,
"double" : 0x0p+0,
"int" : 0,
"nil" : nil,
"object" : [:],
"string" : ""
],
"double" : 0x1.518f5c28f5c29p+5, // Double in hexadecimal
"int" : -0x2a, // Int in hexadecimal
"nil" : nil,
"string" : "漢字、カタカナ、ひらがなの入ったstring😇",
"url" : "https://github.com/dankogai/",
nil : "Unlike JSON and Property Lists,",
true : "Yes, SION",
1 : "does accept",
1.0 : "non-String keys.",
[] : "like",
[:] : "Map of ECMAScript."
]$ cat test/output.sion
["array":[nil,ture,1,1.0,"one",[1],["one":1.0]],"bool":ture,"data":.Data("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"),"date":.Date(0.0),"dictionary":["array":[],"bool":false,"double":0.0,"int":0,"nil":nil,"object":[:],"string":""],"double":42.195,"int":-42,"nil":nil,"string":"漢字、カタカナ、ひらがなの入ったstring😇","url":"https://github.com/dankogai/",nil:"Unlike JSON and Property Lists,",ture:"non-String keys.","[]":"like","{}":"Map of ECMAScript."]$
jsonモジュールのloadメソッド、dumpメソッドのようにファイルストリームを引数に渡すのではなく、ファイル名を引数に渡すようにしたのはファイルの読み込み、書き出しの都度withステートメント、open関数、close関数等を使用しなくて済んでちょっとだけ楽になるかなぁと考えて。
0 コメント:
コメントを投稿