開発環境
- OS: macOS High Sierra - Apple
- Text Editor: Emacs
- プログラミング言語: Python3
- モジュール: sion (GitHub)
- ANTLR4(parser generator)
stream(load関数、dump関数)だけではなく、メソッドの引数の文字列を渡しての読み込み、書き出しを出来るようにしてみた。(loads関数、dumps関数)
test.py
#!/usr/bin/env python3 # Copyright © 2018 kamimura. All rights reserved. import unittest from sion import loads class LoadsDictTest(unittest.TestCase): def setUp(self): pass def tearDown(self): pass def test_list_keys(self): a = loads('[[]:nil, [nil]: nil, [nil, true]: false]') b = {(): None, (None,): None, (None, True): False} self.assertEqual(a, b) def test_dict_keys(self): a = loads('[[:]:nil, [true:nil]:nil, [true:nil, false:nil]:nil]') b = {(): None, ((True, None),): None, ((True, None), (False, None)): None} self.assertEqual(a, b) if __name__ == '__main__': unittest.main()
SIONVisiter.py
# Created by kamimura on 2018/07/21. # Copyright © 2018 kamimura. All rights reserved. # Generated from SION.g4 by ANTLR 4.7.1 from antlr4 import * if __name__ is not None and "." in __name__: from .SIONParser import SIONParser else: from SIONParser import SIONParser import datetime def num_rem_under(n): return n.replace('_', '') def str_esc(s): for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r')]: s = s.replace(o, n) return s # This class defines a complete generic visitor for a parse tree produced by SIONParser. class SIONVisitor(ParseTreeVisitor): # Visit a parse tree produced by SIONParser#si_self. def visitSi_self(self, ctx: SIONParser.Si_selfContext): return self.visitChildren(ctx) # Visit a parse tree produced by SIONParser#si_array. def visitSi_array(self, ctx: SIONParser.Si_arrayContext): if ctx.si_array_items(): a = self.visit(ctx.si_array_items()) else: a = [] return a # Visit a parse tree produced by SIONParser#si_array_items. def visitSi_array_items(self, ctx: SIONParser.Si_array_itemsContext): result = [self.visit(t) for t in ctx.si_self()] return result # Visit a parse tree produced by SIONParser#si_dict. def visitSi_dict(self, ctx: SIONParser.Si_dictContext): if ctx.si_dict_pairs(): d = self.visit(ctx.si_dict_pairs()) else: d = {} return d # Visit a parse tree produced by SIONParser#si_dict_pairs. def visitSi_dict_pairs(self, ctx: SIONParser.Si_dict_pairsContext): kvs = [self.visit(t) for t in ctx.si_dict_pair()] return {k: v for k, v in kvs} # Visit a parse tree produced by SIONParser#si_dict_pair. def visitSi_dict_pair(self, ctx: SIONParser.Si_dict_pairContext): k, v = [self.visit(t) for t in ctx.si_self()] if isinstance(k, list): k = tuple(k) elif isinstance(k, dict): k = tuple((s, t) for s, t in k.items()) return (k, v) # Visit a parse tree produced by SIONParser#si_literal. def visitSi_literal(self, ctx: SIONParser.Si_literalContext): return self.visitChildren(ctx) # Visit a parse tree produced by SIONParser#si_ints. def visitSi_ints(self, ctx: SIONParser.Si_intsContext): if ctx.SI_minus(): sign = -1 else: sign = 1 return sign * self.visitChildren(ctx) # Visit a parse tree produced by SIONParser#si_doubles. def visitSi_doubles(self, ctx: SIONParser.Si_doublesContext): if ctx.SI_minus(): sign = -1 else: sign = 1 text = num_rem_under(ctx.SI_double().getText()) if text[:2] == '0x': n = float.fromhex(text) else: n = float(text) return sign * n # Visit a parse tree produced by SIONParser#si_bool. def visitSi_bool(self, ctx: SIONParser.Si_boolContext): return self.visitChildren(ctx) # Visit a parse tree produced by SIONParser#si_true. def visitSi_true(self, ctx: SIONParser.Si_trueContext): return True # Visit a parse tree produced by SIONParser#si_false. def visitSi_false(self, ctx: SIONParser.Si_falseContext): return False # Visit a parse tree produced by SIONParser#si_nil. def visitSi_nil(self, ctx: SIONParser.Si_nilContext): return None # Visit a parse tree produced by SIONParser#si_int. def visitSi_int(self, ctx: SIONParser.Si_intContext): if ctx.SI_bin(): n = int(num_rem_under(ctx.SI_bin().getText()), 2) elif ctx.SI_oct(): n = int(num_rem_under(ctx.SI_oct().getText()), 8) elif ctx.SI_decimal(): n = int(num_rem_under(ctx.SI_decimal().getText()), 10) elif ctx.SI_hex(): n = int(num_rem_under(ctx.SI_hex().getText()), 16) return n # Visit a parse tree produced by SIONParser#si_data. def visitSi_data(self, ctx: SIONParser.Si_dataContext): return ctx.SI_data().getText()[7:-2].encode('ascii') # Visit a parse tree produced by SIONParser#si_date. def visitSi_date(self, ctx: SIONParser.Si_dateContext): if ctx.si_doubles(): t = self.visit(ctx.si_doubles()) else: t = self.visit(ctx.si_ints()) d = datetime.datetime.fromtimestamp(t) return d # Visit a parse tree produced by SIONParser#si_string. def visitSi_string(self, ctx: SIONParser.Si_stringContext): return ctx.SI_string_literal().getText()[1:-1] del SIONParser
sion.py
# Created by kamimura on 2018/07/21. # Copyright © 2018 kamimura. All rights reserved. import sys import datetime from antlr4 import * from SIONLexer import SIONLexer from SIONParser import SIONParser from SIONVisitor import SIONVisitor def load(file, encoding: str='utf-8', errors: str='strict') -> object: data = file.read() if type(data) == bytes: data = data.decode(encoding, errors) stream = InputStream(data) lexer = SIONLexer(stream) tokens = CommonTokenStream(lexer) parser = SIONParser(tokens) tree = parser.si_self() visitor = SIONVisitor() return visitor.visit(tree) def loads(s): if type(s) == bytes: s = s.decode() stream = InputStream(s) lexer = SIONLexer(stream) tokens = CommonTokenStream(lexer) parser = SIONParser(tokens) tree = parser.si_self() visitor = SIONVisitor() return visitor.visit(tree) def str_esc(s): for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r'), ('\\', '\\\\')]: s = s.replace(o, n) return s def dump(obj, file): t = type(obj) if obj is None: print('nil', file=file, end='') elif t == bool: if obj: print('ture', file=file, end='') else: print('false', file=file, end='') elif t in {int, float}: print(obj, file=file, end='') elif t == str: print(f'"{str_esc(obj)}"', file=file, end='') elif t == bytes: print(f'.Data("{str(obj)[2:-1]}")', file=file, end='') elif t == datetime.datetime: print(f'.Date({t.timestamp(obj)})', file=file, end='') elif t in {list, tuple}: print(f'[', file=file, end='') if len(obj) > 0: for o in obj[:-1]: dump(o, file) print(',', file=file, end='') dump(obj[-1], file) print(']', file=file, end='') elif t == dict: print('[', file=file, end='') ks = list(obj.keys()) if len(ks) == 0: print(':', file=file, end='') elif len(ks) == 1: dump(ks[0], file) print(':', file=file, end='') dump(obj[ks[0]], file) else: for k in ks[:-1]: dump(k, file) print(':', file=file, end='') dump(obj[k], file) print(',', file=file, end='') dump(ks[-1], file) print(':', file=file, end='') dump(obj[ks[-1]], file) print(']', file=file, end='') else: raise TypeError( f"Object of type '{obj.__class__.__name__}' is not SION serializable") def dumps(obj: object): t = type(obj) if obj is None: return 'nil' if t == bool: if obj: return 'true' return 'false' if t in {int, float}: return str(obj) if t == str: return f'"{str_esc(obj)}"' if t == bytes: return f'.Data("{str(obj)[2:-1]}")' if t == datetime.datetime: return f'.Date({t.timestamp(obj)})' if t in {list, tuple}: res = '[' if len(obj) > 0: for o in obj[:-1]: res += dumps(o) + ',' res += dumps(obj[-1]) res += ']' return res if t == dict: res = '[' ks = list(obj.keys()) if len(ks) == 0: res += ':' elif len(ks) == 1: res += dumps(ks[0]) + ':' + dumps(obj[ks[0]]) else: for k in ks[:-1]: res += dumps(k) + ':' + str(obj[k]) + ',' res += dumps(ks[-1]) + ':' + dumps(obj[ks[-1]]) res += ']' return res raise TypeError( f"Object of type '{obj.__class__.__name__}' is not SION serializable") if __name__ == '__main__': if len(sys.argv) > 1: filename = sys.argv[1] else: filename = '../test/t.sion' with open(filename) as f: obj = load(f) print(obj) with open('../test/output.sion', 'w') as f: dump(obj, f)
入出力結果(Terminal, Jupyter(IPython))
$ ./test.py .. ---------------------------------------------------------------------- Ran 2 tests in 0.013s OK $
SIONのディクショナリのキーがunhashable typeの場合、とりあえず文字列に変換してたのを、よりデータ型が近い(?)タプル(リストの場合)、タプルのタプル(dictionaryの場合)に変更。
タプルで済ませるのではなく、SION classを作成、抽象化して、情報を失わないようにするか迷ったり。今のところ、情報が失われる事を許容してにタプルで済ませてた方が、Pythonのオブジェクトとして気軽の扱えるからいいかなぁと考えてたり。
0 コメント:
コメントを投稿