開発環境
- OS: macOS High Sierra - Apple
- Text Editor: Emacs
- プログラミング言語: Python3
- モジュール: sion (GitHub)
- ANTLR4(parser generator)
stream(load関数、dump関数)だけではなく、メソッドの引数の文字列を渡しての読み込み、書き出しを出来るようにしてみた。(loads関数、dumps関数)
test.py
#!/usr/bin/env python3
# Copyright © 2018 kamimura. All rights reserved.
import unittest
from sion import loads
class LoadsDictTest(unittest.TestCase):
def setUp(self):
pass
def tearDown(self):
pass
def test_list_keys(self):
a = loads('[[]:nil, [nil]: nil, [nil, true]: false]')
b = {(): None, (None,): None, (None, True): False}
self.assertEqual(a, b)
def test_dict_keys(self):
a = loads('[[:]:nil, [true:nil]:nil, [true:nil, false:nil]:nil]')
b = {(): None, ((True, None),): None,
((True, None), (False, None)): None}
self.assertEqual(a, b)
if __name__ == '__main__':
unittest.main()
SIONVisiter.py
# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
# Generated from SION.g4 by ANTLR 4.7.1
from antlr4 import *
if __name__ is not None and "." in __name__:
from .SIONParser import SIONParser
else:
from SIONParser import SIONParser
import datetime
def num_rem_under(n):
return n.replace('_', '')
def str_esc(s):
for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r')]:
s = s.replace(o, n)
return s
# This class defines a complete generic visitor for a parse tree produced by SIONParser.
class SIONVisitor(ParseTreeVisitor):
# Visit a parse tree produced by SIONParser#si_self.
def visitSi_self(self, ctx: SIONParser.Si_selfContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_array.
def visitSi_array(self, ctx: SIONParser.Si_arrayContext):
if ctx.si_array_items():
a = self.visit(ctx.si_array_items())
else:
a = []
return a
# Visit a parse tree produced by SIONParser#si_array_items.
def visitSi_array_items(self, ctx: SIONParser.Si_array_itemsContext):
result = [self.visit(t) for t in ctx.si_self()]
return result
# Visit a parse tree produced by SIONParser#si_dict.
def visitSi_dict(self, ctx: SIONParser.Si_dictContext):
if ctx.si_dict_pairs():
d = self.visit(ctx.si_dict_pairs())
else:
d = {}
return d
# Visit a parse tree produced by SIONParser#si_dict_pairs.
def visitSi_dict_pairs(self, ctx: SIONParser.Si_dict_pairsContext):
kvs = [self.visit(t) for t in ctx.si_dict_pair()]
return {k: v for k, v in kvs}
# Visit a parse tree produced by SIONParser#si_dict_pair.
def visitSi_dict_pair(self, ctx: SIONParser.Si_dict_pairContext):
k, v = [self.visit(t) for t in ctx.si_self()]
if isinstance(k, list):
k = tuple(k)
elif isinstance(k, dict):
k = tuple((s, t) for s, t in k.items())
return (k, v)
# Visit a parse tree produced by SIONParser#si_literal.
def visitSi_literal(self, ctx: SIONParser.Si_literalContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_ints.
def visitSi_ints(self, ctx: SIONParser.Si_intsContext):
if ctx.SI_minus():
sign = -1
else:
sign = 1
return sign * self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_doubles.
def visitSi_doubles(self, ctx: SIONParser.Si_doublesContext):
if ctx.SI_minus():
sign = -1
else:
sign = 1
text = num_rem_under(ctx.SI_double().getText())
if text[:2] == '0x':
n = float.fromhex(text)
else:
n = float(text)
return sign * n
# Visit a parse tree produced by SIONParser#si_bool.
def visitSi_bool(self, ctx: SIONParser.Si_boolContext):
return self.visitChildren(ctx)
# Visit a parse tree produced by SIONParser#si_true.
def visitSi_true(self, ctx: SIONParser.Si_trueContext):
return True
# Visit a parse tree produced by SIONParser#si_false.
def visitSi_false(self, ctx: SIONParser.Si_falseContext):
return False
# Visit a parse tree produced by SIONParser#si_nil.
def visitSi_nil(self, ctx: SIONParser.Si_nilContext):
return None
# Visit a parse tree produced by SIONParser#si_int.
def visitSi_int(self, ctx: SIONParser.Si_intContext):
if ctx.SI_bin():
n = int(num_rem_under(ctx.SI_bin().getText()), 2)
elif ctx.SI_oct():
n = int(num_rem_under(ctx.SI_oct().getText()), 8)
elif ctx.SI_decimal():
n = int(num_rem_under(ctx.SI_decimal().getText()), 10)
elif ctx.SI_hex():
n = int(num_rem_under(ctx.SI_hex().getText()), 16)
return n
# Visit a parse tree produced by SIONParser#si_data.
def visitSi_data(self, ctx: SIONParser.Si_dataContext):
return ctx.SI_data().getText()[7:-2].encode('ascii')
# Visit a parse tree produced by SIONParser#si_date.
def visitSi_date(self, ctx: SIONParser.Si_dateContext):
if ctx.si_doubles():
t = self.visit(ctx.si_doubles())
else:
t = self.visit(ctx.si_ints())
d = datetime.datetime.fromtimestamp(t)
return d
# Visit a parse tree produced by SIONParser#si_string.
def visitSi_string(self, ctx: SIONParser.Si_stringContext):
return ctx.SI_string_literal().getText()[1:-1]
del SIONParser
sion.py
# Created by kamimura on 2018/07/21.
# Copyright © 2018 kamimura. All rights reserved.
import sys
import datetime
from antlr4 import *
from SIONLexer import SIONLexer
from SIONParser import SIONParser
from SIONVisitor import SIONVisitor
def load(file, encoding: str='utf-8', errors: str='strict') -> object:
data = file.read()
if type(data) == bytes:
data = data.decode(encoding, errors)
stream = InputStream(data)
lexer = SIONLexer(stream)
tokens = CommonTokenStream(lexer)
parser = SIONParser(tokens)
tree = parser.si_self()
visitor = SIONVisitor()
return visitor.visit(tree)
def loads(s):
if type(s) == bytes:
s = s.decode()
stream = InputStream(s)
lexer = SIONLexer(stream)
tokens = CommonTokenStream(lexer)
parser = SIONParser(tokens)
tree = parser.si_self()
visitor = SIONVisitor()
return visitor.visit(tree)
def str_esc(s):
for o, n in [('"', '\\"'), ('\n', '\\n'), ('\r', '\\r'), ('\\', '\\\\')]:
s = s.replace(o, n)
return s
def dump(obj, file):
t = type(obj)
if obj is None:
print('nil', file=file, end='')
elif t == bool:
if obj:
print('ture', file=file, end='')
else:
print('false', file=file, end='')
elif t in {int, float}:
print(obj, file=file, end='')
elif t == str:
print(f'"{str_esc(obj)}"', file=file, end='')
elif t == bytes:
print(f'.Data("{str(obj)[2:-1]}")', file=file, end='')
elif t == datetime.datetime:
print(f'.Date({t.timestamp(obj)})', file=file, end='')
elif t in {list, tuple}:
print(f'[', file=file, end='')
if len(obj) > 0:
for o in obj[:-1]:
dump(o, file)
print(',', file=file, end='')
dump(obj[-1], file)
print(']', file=file, end='')
elif t == dict:
print('[', file=file, end='')
ks = list(obj.keys())
if len(ks) == 0:
print(':', file=file, end='')
elif len(ks) == 1:
dump(ks[0], file)
print(':', file=file, end='')
dump(obj[ks[0]], file)
else:
for k in ks[:-1]:
dump(k, file)
print(':', file=file, end='')
dump(obj[k], file)
print(',', file=file, end='')
dump(ks[-1], file)
print(':', file=file, end='')
dump(obj[ks[-1]], file)
print(']', file=file, end='')
else:
raise TypeError(
f"Object of type '{obj.__class__.__name__}' is not SION serializable")
def dumps(obj: object):
t = type(obj)
if obj is None:
return 'nil'
if t == bool:
if obj:
return 'true'
return 'false'
if t in {int, float}:
return str(obj)
if t == str:
return f'"{str_esc(obj)}"'
if t == bytes:
return f'.Data("{str(obj)[2:-1]}")'
if t == datetime.datetime:
return f'.Date({t.timestamp(obj)})'
if t in {list, tuple}:
res = '['
if len(obj) > 0:
for o in obj[:-1]:
res += dumps(o) + ','
res += dumps(obj[-1])
res += ']'
return res
if t == dict:
res = '['
ks = list(obj.keys())
if len(ks) == 0:
res += ':'
elif len(ks) == 1:
res += dumps(ks[0]) + ':' + dumps(obj[ks[0]])
else:
for k in ks[:-1]:
res += dumps(k) + ':' + str(obj[k]) + ','
res += dumps(ks[-1]) + ':' + dumps(obj[ks[-1]])
res += ']'
return res
raise TypeError(
f"Object of type '{obj.__class__.__name__}' is not SION serializable")
if __name__ == '__main__':
if len(sys.argv) > 1:
filename = sys.argv[1]
else:
filename = '../test/t.sion'
with open(filename) as f:
obj = load(f)
print(obj)
with open('../test/output.sion', 'w') as f:
dump(obj, f)
入出力結果(Terminal, Jupyter(IPython))
$ ./test.py .. ---------------------------------------------------------------------- Ran 2 tests in 0.013s OK $
SIONのディクショナリのキーがunhashable typeの場合、とりあえず文字列に変換してたのを、よりデータ型が近い(?)タプル(リストの場合)、タプルのタプル(dictionaryの場合)に変更。
タプルで済ませるのではなく、SION classを作成、抽象化して、情報を失わないようにするか迷ったり。今のところ、情報が失われる事を許容してにタプルで済ませてた方が、Pythonのオブジェクトとして気軽の扱えるからいいかなぁと考えてたり。
0 コメント:
コメントを投稿