開発環境
- OS X El Capitan - Apple (OS)
- Emacs(Text Editor)
- Java (実行環境)
コンピュータシステムの理論と実装 (Noam Nisan (著)、Shimon Schocken (著)、斎藤 康毅(翻訳)、オライリージャパン)の10章(コンパイラ#1:構文解析)、10.5(プロジェクト)、10.5.2(第1段階: トークナイザー)を取り組んでみる。
10.5(プロジェクト)、10.5.2(第1段階: トークナイザー)
コード(Emacs)
JackAnalyzer.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import glob
import sys
import re
types = {'KEYWORD': 'keyword', 'SYMBOL': 'symbol', 'IDENTIFIER': 'identifier',
'INT_CONST': 'integerConstant', 'STRING_CONST': 'stringConstant'}
class JackTokenizer:
def __init__(self, file):
self.file = file
self.next_ch = ''
self.cur_token_type = ''
self.cur_token = ''
self.next_token_type = ''
self.next_token = self.get_next_token()
def get_next_token(self):
token = ''
if self.next_ch != '':
c = self.next_ch
self.next_ch = ''
else:
c = self.file.read(1)
while re.match('\s', c):
c = self.file.read(1)
if c == '':
return ''
while True:
if re.match('\s', c):
return self.get_next_token()
if c == '/':
token += c
c = self.file.read(1)
if c == '/':
self.file.readline()
return self.get_next_token()
if c == '*':
while True:
c = self.file.read(1)
if c == '*':
c = self.file.read(1)
if c == '/':
break
return self.get_next_token()
self.next_ch = c
self.next_token_type = 'SYMBOL'
return token
if re.match(r'[-{}()\[\].,;+*/&|<>=~]', c):
token = c
self.next_token_type = 'SYMBOL'
return token
if re.match(r'\d', c):
token = c
while True:
c = self.file.read(1)
if re.match(r'\d', c):
token += c
else:
self.next_ch = c
break
self.next_token_type = 'INT_CONST'
return token
if c == '"':
while True:
c = self.file.read(1)
if c == '"':
break
else:
token += c
self.next_token_type = 'STRING_CONST'
return token
token = c
while True:
c = self.file.read(1)
if re.match(r'[a-zA-Z0-9_]', c):
token += c
else:
self.next_ch = c
break
if token in ['class', 'constructor', 'function', 'method', 'field',
'static', 'var', 'int', 'char', 'boolean', 'void',
'true', 'false', 'null', 'this', 'let', 'do', 'if',
'else', 'while', 'return']:
self.next_token_type = 'KEYWORD'
else:
self.next_token_type = 'IDENTIFIER'
return token
def has_more_tokens(self):
return self.next_token != ''
def advance(self):
self.cur_token = self.next_token
self.cur_token_type = self.next_token_type
self.next_token = self.get_next_token()
def token_type(self):
return self.cur_token_type
def keyword(self):
return self.cur_token.upper()
def symbol(self):
return self.cur_token. \
replace('&', '&'). \
replace('<', '<'). \
replace('>', '>')
def identifier(self):
return self.cur_token
def int_val(self):
return int(self.cur_token)
def string_val(self):
return self.cur_token
if __name__ == '__main__':
source = sys.argv[1]
filenames = []
if os.path.isfile(source):
filenames.append(source)
elif os.path.isdir(source):
filenames = glob.glob('{0}{1}*.jack'.format(source, os.path.sep))
for filename in filenames:
with open(filename) as inf, \
open(filename.replace('.jack', '.xml'), 'w') as outf:
tokenizer = JackTokenizer(inf)
print('<tokens>', file=outf)
while tokenizer.has_more_tokens():
tokenizer.advance()
t = tokenizer.token_type()
token = ''
if t == 'KEYWORD':
token = tokenizer.keyword().lower()
elif t == 'SYMBOL':
token = tokenizer.symbol()
elif t == 'IDENTIFIER':
token = tokenizer.identifier()
elif t == 'INT_CONST':
token = tokenizer.int_val()
elif t == 'STRING_CONST':
token = tokenizer.string_val()
print('<{0}> {1} </{0}>'.format(
types[tokenizer.token_type()], token),
file=outf)
print('</tokens>', file=outf)
入出力結果(Terminal, IPython)
$ make rm -f Square/*.xml ./JackAnalyzer.py Square ./JackAnalyzer.py ArrayTest cat Square/Main.xml <tokens> <keyword> class </keyword> <identifier> Main </identifier> <symbol> { </symbol> <keyword> function </keyword> <keyword> void </keyword> <identifier> main </identifier> <symbol> ( </symbol> <symbol> ) </symbol> <symbol> { </symbol> <keyword> var </keyword> <identifier> SquareGame </identifier> <identifier> game </identifier> <symbol> ; </symbol> <keyword> let </keyword> <identifier> game </identifier> <symbol> = </symbol> <identifier> SquareGame </identifier> <symbol> . </symbol> <identifier> new </identifier> <symbol> ( </symbol> <symbol> ) </symbol> <symbol> ; </symbol> <keyword> do </keyword> <identifier> game </identifier> <symbol> . </symbol> <identifier> run </identifier> <symbol> ( </symbol> <symbol> ) </symbol> <symbol> ; </symbol> <keyword> do </keyword> <identifier> game </identifier> <symbol> . </symbol> <identifier> dispose </identifier> <symbol> ( </symbol> <symbol> ) </symbol> <symbol> ; </symbol> <keyword> return </keyword> <symbol> ; </symbol> <symbol> } </symbol> <symbol> } </symbol> </tokens> ./TextComparer Square/Main.xml test_xml/Square/MainT.xml Comparison ended successfully ./TextComparer Square/Square.xml test_xml/Square/SquareT.xml Comparison ended successfully ./TextComparer Square/SquareGame.xml test_xml/Square/SquareGameT.xml Comparison ended successfully rm -f ArrayTest/*.xml ./JackAnalyzer.py ArrayTest ./TextComparer ArrayTest/Main.xml test_xml/ArrayTest/MainT.xml Comparison ended successfully $
0 コメント:
コメントを投稿