-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScanner.py
More file actions
134 lines (116 loc) · 4.46 KB
/
Scanner.py
File metadata and controls
134 lines (116 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from enum import Enum
from Token import Token
from Token import TokenType
# enum for the states
class State(Enum):
START = 1
INCOMMENT = 2
INNUM = 3
INID = 4
INASSIGN = 5
DONE = 6
# set of reserved words
reserved_words = {'if', 'then', 'else', 'end', 'repeat', 'until', 'read', 'write'}
# set of special symbols
special_symbols = {'+', '-', '*', '/', '=', '<', '(', ')', ';', ':='}
class Scanner:
def __init__(self,input_path='',output_path=''):
# constructor gets the path input and output files
# and set the current state to START state
self._input_path = input_path
self._output_path = output_path
self._current_state = State.START
def set_files(self,input_path,output_path):
# use this function to change the path of input and output files
self._input_path = input_path
self._output_path = output_path
def read_file(self):
# open the input file and read it character by character
with open(self._input_path) as file:
while True:
char = file.read(1)
if not char:
break
yield char
def write_token(self,token,t_type):
# write tokens to the output file
tokentype = {TokenType.RESWORD: 'reserved word', TokenType.SPSYMB: 'special symbol',
TokenType.ID: 'identifier', TokenType.NUM: 'number'}
with open(self._output_path, 'a') as file:
file.write(token+' : '+tokentype[t_type]+'\n')
def run(self) -> Token:
# the main function of the scanner
# check characters according to DFA and determines token types
characters = self.read_file()
end_of_file = False
c = ''
while not end_of_file:
token = ''
tokentype = None
self._current_state = State.START
while self._current_state != State.DONE:
try:
if not c:
c = next(characters)
except StopIteration:
end_of_file = True
break
if self._current_state == State.START:
if c == '{':
self._current_state = State.INCOMMENT
elif c.isdigit():
self._current_state = State.INNUM
token += c
tokentype = TokenType.NUM
elif c.isalpha():
self._current_state = State.INID
token += c
tokentype = TokenType.ID
elif c == ':':
self._current_state = State.INASSIGN
token += c
tokentype = TokenType.SPSYMB
elif c in special_symbols:
self._current_state = State.DONE
token += c
tokentype = TokenType.SPSYMB
else:
self._current_state = State.START
c = ''
elif self._current_state == State.INCOMMENT:
if c == '}':
self._current_state = State.START
c = ''
elif self._current_state == State.INNUM:
if c.isdigit():
token += c
c = ''
else:
self._current_state = State.DONE
elif self._current_state == State.INID:
if c.isalpha():
token += c
c = ''
else:
self._current_state = State.DONE
elif self._current_state == State.INASSIGN:
self._current_state = State.DONE
if c == '=':
token += c
c = ''
if tokentype == TokenType.ID and token in reserved_words:
tokentype = TokenType.RESWORD
if token and tokentype:
self.write_token(token, tokentype)
yield Token(token, tokentype)
if __name__ == "__main__":
input_file = 'tiny_sample_code.txt'
output_file = 'scanner_output.txt'
scanner = Scanner(input_file, output_file)
found = True
g = scanner.run()
while found:
try:
g.__next__()
except StopIteration:
break