Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 49 additions & 28 deletions beangulp/importers/csvbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,26 @@ def __init__(self):
# warnings.warn('skiplines is deprecated, use header instead', DeprecationWarning)
self.header = self.skiplines

def open(self, filepath):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def open(self, filepath):
def open(self, filepath, encoding):

"""Open the CSV file for reading.

This method can be overridden in subclasses to customize raw file reading,
for example to pre-proceess text lines before import. Note that to skip
a fixed number of lines at the file beginning or end, setting the class
members "header" or "footer" is the easier approach.

This method uses the class member 'encoding'. Overriding this method causes
that member to be ignored unless the overriding method explicitly uses it.
Comment on lines +280 to +288
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Open the CSV file for reading.
This method can be overridden in subclasses to customize raw file reading,
for example to pre-proceess text lines before import. Note that to skip
a fixed number of lines at the file beginning or end, setting the class
members "header" or "footer" is the easier approach.
This method uses the class member 'encoding'. Overriding this method causes
that member to be ignored unless the overriding method explicitly uses it.
"""Open filepath and return an iterable yielding lines of CSV-formatted text.
This method can be overridden in subclasses to customize file reading, for
example to pre-proceess text lines before import. To simply skip a fixed
number of lines at the beginning or end, consider setting the ``header``
or ``footer`` class variables instead.


Args:
filepath: Filesystem path to the input file.

Returns:
An iterable providing lines of CSV-formatted text.
"""
with open(filepath, encoding = self.encoding) as fd:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
with open(filepath, encoding = self.encoding) as fd:
with open(filepath, encoding=encoding) as fd:

yield from fd

def read(self, filepath):
"""Read CSV file according to class defined columns specification.

Expand All @@ -292,34 +312,35 @@ def read(self, filepath):

"""

with open(filepath, encoding=self.encoding) as fd:
# Skip header and footer lines.
lines = _chomp(fd, self.header, self.footer)

# Filter out comment lines.
if self.comments:
lines = filter(lambda x: not x.startswith(self.comments), lines)

reader = csv.reader(lines, dialect=self.dialect)

# Map column names to column indices.
names = None
if self.names:
headers = next(reader, None)
if headers is None:
raise IndexError("The input file does not contain an header line")
names = {name.strip(): index for index, name in enumerate(headers)}

# Construct a class with attribute accessors for the
# configured columns that works similarly to a namedtuple.
attrs = {}
for name, column in self.columns.items():
attrs[name] = property(column.getter(names))
row = type("Row", (tuple,), attrs)

# Return data rows.
for x in reader:
yield row(x)
lines = self.open(filepath)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
lines = self.open(filepath)
lines = self.open(filepath, self.encoding)


# Skip header and footer lines.
lines = _chomp(lines, self.header, self.footer)

# Filter out comment lines.
if self.comments:
lines = filter(lambda x: not x.startswith(self.comments), lines)

reader = csv.reader(lines, dialect=self.dialect)

# Map column names to column indices.
names = None
if self.names:
headers = next(reader, None)
if headers is None:
raise IndexError("The input file does not contain an header line")
names = {name.strip(): index for index, name in enumerate(headers)}

# Construct a class with attribute accessors for the
# configured columns that works similarly to a namedtuple.
attrs = {}
for name, column in self.columns.items():
attrs[name] = property(column.getter(names))
row = type("Row", (tuple,), attrs)

# Return data rows.
for x in reader:
yield row(x)


class Importer(beangulp.Importer, CSVReader):
Expand Down
26 changes: 26 additions & 0 deletions beangulp/importers/csvbase_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import decimal
import re
import unittest
from itertools import dropwhile

from beancount.core import data
from beancount.parser import cmptest
Expand Down Expand Up @@ -451,6 +452,31 @@ class Reader(CSVReader):
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0][0], "a")

@docfile
def test_custom_open(self, filename):
"""\
Skip this line
Skip this too
First, Second
a, b
c, d
"""

class Reader(CSVReader):
first = Column("First")
second = Column("Second")

def open(self, filepath):
"""Skip lines until we find the column headers."""
lines = super().open(filepath)
return dropwhile(lambda line: "First" not in line, lines)

reader = Reader()
rows = list(reader.read(filename))
self.assertEqual(len(rows), 2)
self.assertEqual(rows[0].first, "a")
self.assertEqual(rows[1].second, "d")


class Base(Importer):
def identify(self, filepath):
Expand Down