beancount · mlell · Jan 4, 2026 · dnicolodi · Jan 7, 2026 · dnicolodi
diff --git a/beangulp/importers/csvbase.py b/beangulp/importers/csvbase.py
@@ -276,6 +276,26 @@ def __init__(self):
             # warnings.warn('skiplines is deprecated, use header instead', DeprecationWarning)
             self.header = self.skiplines
 
+    def open(self, filepath):
-    def open(self, filepath):
+    def open(self, filepath, encoding):
-    def open(self, filepath):
+    def open(self, filepath, encoding):
+        """Open the CSV file for reading.
+
+        This method can be overridden in subclasses to customize raw file reading,
+        for example to pre-proceess text lines before import. Note that to skip
+        a fixed number of lines at the file beginning or end, setting the class
+        members "header" or "footer" is the easier approach.
+
+        This method uses the class member 'encoding'. Overriding this method causes
+        that member to be ignored unless the overriding method explicitly uses it.
-        """Open the CSV file for reading.
-
-        This method can be overridden in subclasses to customize raw file reading,
-        for example to pre-proceess text lines before import. Note that to skip
-        a fixed number of lines at the file beginning or end, setting the class
-        members "header" or "footer" is the easier approach.
-
-        This method uses the class member 'encoding'. Overriding this method causes
-        that member to be ignored unless the overriding method explicitly uses it.
+        """Open filepath and return an iterable yielding lines of CSV-formatted text.
+        
+        This method can be overridden in subclasses to customize file reading, for 
+        example to pre-proceess text lines before import. To simply skip a fixed
+        number of lines at the beginning or end, consider setting the ``header`` 
+        or ``footer`` class variables instead.
-        """Open the CSV file for reading.
-
-        This method can be overridden in subclasses to customize raw file reading,
-        for example to pre-proceess text lines before import. Note that to skip
-        a fixed number of lines at the file beginning or end, setting the class
-        members "header" or "footer" is the easier approach.
-
-        This method uses the class member 'encoding'. Overriding this method causes
-        that member to be ignored unless the overriding method explicitly uses it.
+        """Open filepath and return an iterable yielding lines of CSV-formatted text.
+        
+        This method can be overridden in subclasses to customize file reading, for 
+        example to pre-proceess text lines before import. To simply skip a fixed
+        number of lines at the beginning or end, consider setting the ``header`` 
+        or ``footer`` class variables instead.
+
+        Args:
+          filepath: Filesystem path to the input file.
+
+        Returns:
+          An iterable providing lines of CSV-formatted text.
+        """
+        with open(filepath, encoding = self.encoding) as fd:
-        with open(filepath, encoding = self.encoding) as fd:
+        with open(filepath, encoding=encoding) as fd:
-        with open(filepath, encoding = self.encoding) as fd:
+        with open(filepath, encoding=encoding) as fd:
+            yield from fd
+
     def read(self, filepath):
         """Read CSV file according to class defined columns specification.
 
@@ -292,34 +312,35 @@ def read(self, filepath):
 
         """
 
-        with open(filepath, encoding=self.encoding) as fd:
-            # Skip header and footer lines.
-            lines = _chomp(fd, self.header, self.footer)
-
-            # Filter out comment lines.
-            if self.comments:
-                lines = filter(lambda x: not x.startswith(self.comments), lines)
-
-            reader = csv.reader(lines, dialect=self.dialect)
-
-            # Map column names to column indices.
-            names = None
-            if self.names:
-                headers = next(reader, None)
-                if headers is None:
-                    raise IndexError("The input file does not contain an header line")
-                names = {name.strip(): index for index, name in enumerate(headers)}
-
-            # Construct a class with attribute accessors for the
-            # configured columns that works similarly to a namedtuple.
-            attrs = {}
-            for name, column in self.columns.items():
-                attrs[name] = property(column.getter(names))
-            row = type("Row", (tuple,), attrs)
-
-            # Return data rows.
-            for x in reader:
-                yield row(x)
+        lines = self.open(filepath)
-        lines = self.open(filepath)
+        lines = self.open(filepath, self.encoding)
-        lines = self.open(filepath)
+        lines = self.open(filepath, self.encoding)
+
+        # Skip header and footer lines.
+        lines = _chomp(lines, self.header, self.footer)
+
+        # Filter out comment lines.
+        if self.comments:
+            lines = filter(lambda x: not x.startswith(self.comments), lines)
+
+        reader = csv.reader(lines, dialect=self.dialect)
+
+        # Map column names to column indices.
+        names = None
+        if self.names:
+            headers = next(reader, None)
+            if headers is None:
+                raise IndexError("The input file does not contain an header line")
+            names = {name.strip(): index for index, name in enumerate(headers)}
+
+        # Construct a class with attribute accessors for the
+        # configured columns that works similarly to a namedtuple.
+        attrs = {}
+        for name, column in self.columns.items():
+            attrs[name] = property(column.getter(names))
+        row = type("Row", (tuple,), attrs)
+
+        # Return data rows.
+        for x in reader:
+            yield row(x)
 
 
 class Importer(beangulp.Importer, CSVReader):

diff --git a/beangulp/importers/csvbase_test.py b/beangulp/importers/csvbase_test.py
@@ -2,6 +2,7 @@
 import decimal
 import re
 import unittest
+from itertools import dropwhile
 
 from beancount.core import data
 from beancount.parser import cmptest
@@ -451,6 +452,31 @@ class Reader(CSVReader):
         self.assertEqual(len(rows), 1)
         self.assertEqual(rows[0][0], "a")
 
+    @docfile
+    def test_custom_open(self, filename):
+        """\
+        Skip this line
+        Skip this too
+        First, Second
+        a, b
+        c, d
+        """
+
+        class Reader(CSVReader):
+            first = Column("First")
+            second = Column("Second")
+
+            def open(self, filepath):
+                """Skip lines until we find the column headers."""
+                lines = super().open(filepath)
+                return dropwhile(lambda line: "First" not in line, lines)
+
+        reader = Reader()
+        rows = list(reader.read(filename))
+        self.assertEqual(len(rows), 2)
+        self.assertEqual(rows[0].first, "a")
+        self.assertEqual(rows[1].second, "d")
+
 
 class Base(Importer):
     def identify(self, filepath):