Skip to content

Commit ad1f90f

Browse files
committed
add Stream to represent input data
This primarily wraps the str|bytes|list that is the data to parse, but also adds the metadata `source` to hold a filename, URL, etc. where the data is from. Introducing this class also paves the way for eventually supporting streaming input data.
1 parent 97fc01c commit ad1f90f

File tree

1 file changed

+38
-19
lines changed

1 file changed

+38
-19
lines changed

src/parsy/__init__.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,33 @@
1212
noop = lambda x: x
1313

1414

15-
def line_info_at(stream, index):
15+
@dataclass
16+
class Stream:
17+
"""Data to parse, possibly equipped with a name for the source it's from,
18+
e.g. a file path."""
19+
20+
data: str | bytes | list
21+
source: str | None = None
22+
23+
def __len__(self):
24+
return len(self.data)
25+
26+
def __getitem__(self, i):
27+
# Subscripting bytes with `[index]` instead of `[index:index + 1]`
28+
# returns an int
29+
if isinstance(self.data, bytes) and not isinstance(i, slice):
30+
return self.data[i : i + 1]
31+
else:
32+
return self.data[i]
33+
34+
35+
def line_info_at(stream: Stream, index):
1636
if index > len(stream):
1737
raise ValueError("invalid index")
18-
line = stream.count("\n", 0, index)
19-
last_nl = stream.rfind("\n", 0, index)
38+
line = stream.data.count("\n", 0, index)
39+
last_nl = stream.data.rfind("\n", 0, index)
2040
col = index - (last_nl + 1)
21-
return (line, col)
41+
return (stream.source, line, col)
2242

2343

2444
class ParseError(RuntimeError):
@@ -29,7 +49,11 @@ def __init__(self, expected, stream, index):
2949

3050
def line_info(self):
3151
try:
32-
return "{}:{}".format(*line_info_at(self.stream, self.index))
52+
source, row, col = line_info_at(self.stream, self.index)
53+
if source is None:
54+
return "{}:{}".format(row, col)
55+
else:
56+
return "{}:{}:{}".format(source, row, col)
3357
except (TypeError, AttributeError): # not a str
3458
return str(self.index)
3559

@@ -83,22 +107,22 @@ class Parser:
83107
of the failure.
84108
"""
85109

86-
def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]):
110+
def __init__(self, wrapped_fn: Callable[[Stream, int], Result]):
87111
"""
88112
Creates a new Parser from a function that takes a stream
89113
and returns a Result.
90114
"""
91115
self.wrapped_fn = wrapped_fn
92116

93-
def __call__(self, stream: str | bytes | list, index: int):
117+
def __call__(self, stream: Stream, index: int):
94118
return self.wrapped_fn(stream, index)
95119

96-
def parse(self, stream: str | bytes | list) -> Any:
120+
def parse(self, stream: Stream) -> Any:
97121
"""Parses a string or list of tokens and returns the result or raise a ParseError."""
98122
(result, _) = (self << eof).parse_partial(stream)
99123
return result
100124

101-
def parse_partial(self, stream: str | bytes | list) -> tuple[Any, str | bytes | list]:
125+
def parse_partial(self, stream: Stream) -> tuple[Any, Stream]:
102126
"""
103127
Parses the longest possible prefix of a given string.
104128
Returns a tuple of the result and the unparsed remainder,
@@ -343,10 +367,10 @@ def mark(self) -> Parser:
343367

344368
@generate
345369
def marked():
346-
start = yield line_info
370+
_, *start = yield line_info
347371
body = yield self
348-
end = yield line_info
349-
return (start, body, end)
372+
_, *end = yield line_info
373+
return (tuple(start), body, tuple(end))
350374

351375
return marked
352376

@@ -557,7 +581,7 @@ def regex(exp: str, flags=0, group: int | str | tuple = 0) -> Parser:
557581

558582
@Parser
559583
def regex_parser(stream, index):
560-
match = exp.match(stream, index)
584+
match = exp.match(stream.data, index)
561585
if match:
562586
return Result.success(match.end(), match.group(*group))
563587
else:
@@ -577,12 +601,7 @@ def test_item(func: Callable[..., bool], description: str) -> Parser:
577601
@Parser
578602
def test_item_parser(stream, index):
579603
if index < len(stream):
580-
if isinstance(stream, bytes):
581-
# Subscripting bytes with `[index]` instead of
582-
# `[index:index + 1]` returns an int
583-
item = stream[index : index + 1]
584-
else:
585-
item = stream[index]
604+
item = stream[index]
586605
if func(item):
587606
return Result.success(index + 1, item)
588607
return Result.failure(index, description)

0 commit comments

Comments
 (0)