From 04fa0d20a54c6527d3ac7412f40b91aa185de78a Mon Sep 17 00:00:00 2001 From: Imran Haque Date: Mon, 4 Nov 2013 12:09:08 -0800 Subject: [PATCH 1/5] Replace \n\r line endings with \n line endings --- LICENSE.txt | 18 ++-- MANIFEST.in | 6 +- setup.py | 34 +++---- sheets/__init__.py | 6 +- sheets/base.py | 234 ++++++++++++++++++++++----------------------- sheets/columns.py | 216 ++++++++++++++++++++--------------------- sheets/options.py | 24 ++--- sheets/tests.py | 44 ++++----- tests/columns.py | 172 ++++++++++++++++----------------- 9 files changed, 377 insertions(+), 377 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 7d09c3f..723aecf 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,10 +1,10 @@ -Copyright (c) 2010, Marty Alchin -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the author nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. - +Copyright (c) 2010, Marty Alchin +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of the author nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index dcc355d..8808262 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ -include LICENSE.txt -include contents.csv -recursive-include tests *.py +include LICENSE.txt +include contents.csv +recursive-include tests *.py diff --git a/setup.py b/setup.py index 5da1dca..250fdd3 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,18 @@ -from distutils.core import setup - -setup(name='Sheets', - version='0.6', - author='Marty Alchin', - author_email='marty@martyalchin.com', - url='https://github.com/gulopine/sheets/', - packages=['sheets'], - license='BSD', - description='A declarative class framework for working with CSV files', - classifiers=[ - 'License :: OSI Approved :: BSD License', - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries :: Application Frameworks', - 'Programming Language :: Python :: 3.1', - ] +from distutils.core import setup + +setup(name='Sheets', + version='0.6', + author='Marty Alchin', + author_email='marty@martyalchin.com', + url='https://github.com/gulopine/sheets/', + packages=['sheets'], + license='BSD', + description='A declarative class framework for working with CSV files', + classifiers=[ + 'License :: OSI Approved :: BSD License', + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Libraries :: Application Frameworks', + 'Programming Language :: Python :: 3.1', + ] ) \ No newline at end of file diff --git a/sheets/__init__.py b/sheets/__init__.py index b221b6b..1a3b7d8 100644 --- a/sheets/__init__.py +++ b/sheets/__init__.py @@ -1,3 +1,3 @@ -from sheets.base import * -from sheets.options import * -from sheets.columns import * +from sheets.base import * +from sheets.options import * +from sheets.columns import * diff --git a/sheets/base.py b/sheets/base.py index c3ab343..54d8f12 100644 --- a/sheets/base.py +++ b/sheets/base.py @@ -1,117 +1,117 @@ -import csv -from collections import OrderedDict - -from sheets import options - -__all__ = ['Row', 'Reader', 'Writer'] - -class RowMeta(type): - def __init__(cls, name, bases, attrs): - if 'Dialect' in attrs: - # Filter out Python's own additions to the namespace - items = attrs.pop('Dialect').__dict__.items() - items = dict((k, v) for (k, v) in items if not k.startswith('__')) - else: - # No options were explicitly defined - items = {} - cls._dialect = options.Dialect(**items) - - for key, attr in attrs.items(): - if hasattr(attr, 'attach_to_class'): - attr.attach_to_class(cls, key, cls._dialect) - - @classmethod - def __prepare__(cls, name, bases): - return OrderedDict() - - -class Row(metaclass=RowMeta): - # Not yet written about - - def __init__(self, *args, **kwargs): - column_names = [column.name for column in self._dialect.columns] - - # First, make sure the arguments make sense - if len(args) > len(column_names): - msg = "__init__() takes at most %d arguments (%d given)" - raise TypeError(msg % (len(column_names), len(args))) - - for name in kwargs: - if name not in column_names: - raise TypeError("Got unknown keyword argument '%s'" % name) - - for i, name in enumerate(column_names[:len(args)]): - if name in kwargs: - msg = "__init__() got multiple values for keyword argument '%s'" - raise TypeError(msg % name) - kwargs[name] = args[i] - - # Now populate the actual values on the object - for column in self._dialect.columns: - try: - value = column.to_python(kwargs[column.name]) - except KeyError: - # No value was provided - value = None - setattr(self, column.name, value) - - errors = () - - def is_valid(self): - valid = True - self.errors = [] - for column in self._dialect.columns: - value = getattr(self, column.name) - try: - column.validate(value) - except ValueError as e: - self.errors.append(e) - valid = False - return valid - - @classmethod - def reader(cls, file): - return Reader(cls, file) - - @classmethod - def writer(cls, file): - return Writer(cls, file) - - -class Reader: - def __init__(self, row_cls, file): - self.row_cls = row_cls - self.csv_reader = csv.reader(file, **row_cls._dialect.csv_dialect) - self.skip_header_row = row_cls._dialect.has_header_row - - def __iter__(self): - return self - - def __next__(self): - # Skip the first row if it's a header - if self.skip_header_row: - self.csv_reader.__next__() - self.skip_header_row = False - - return self.row_cls(*self.csv_reader.__next__()) - - -class Writer: - def __init__(self, row_cls, file): - self.columns = row_cls._dialect.columns - self._writer = csv.writer(file, row_cls._dialect.csv_dialect) - self.needs_header_row = row_cls._dialect.has_header_row - - def writerow(self, row): - if self.needs_header_row: - values = [column.title.title() for column in self.columns] - self._writer.writerow(values) - self.needs_header_row = False - values = [getattr(row, column.name) for column in self.columns] - self._writer.writerow(values) - - def writerows(self, rows): - for row in rows: - self.writerow(row) - - +import csv +from collections import OrderedDict + +from sheets import options + +__all__ = ['Row', 'Reader', 'Writer'] + +class RowMeta(type): + def __init__(cls, name, bases, attrs): + if 'Dialect' in attrs: + # Filter out Python's own additions to the namespace + items = attrs.pop('Dialect').__dict__.items() + items = dict((k, v) for (k, v) in items if not k.startswith('__')) + else: + # No options were explicitly defined + items = {} + cls._dialect = options.Dialect(**items) + + for key, attr in attrs.items(): + if hasattr(attr, 'attach_to_class'): + attr.attach_to_class(cls, key, cls._dialect) + + @classmethod + def __prepare__(cls, name, bases): + return OrderedDict() + + +class Row(metaclass=RowMeta): + # Not yet written about + + def __init__(self, *args, **kwargs): + column_names = [column.name for column in self._dialect.columns] + + # First, make sure the arguments make sense + if len(args) > len(column_names): + msg = "__init__() takes at most %d arguments (%d given)" + raise TypeError(msg % (len(column_names), len(args))) + + for name in kwargs: + if name not in column_names: + raise TypeError("Got unknown keyword argument '%s'" % name) + + for i, name in enumerate(column_names[:len(args)]): + if name in kwargs: + msg = "__init__() got multiple values for keyword argument '%s'" + raise TypeError(msg % name) + kwargs[name] = args[i] + + # Now populate the actual values on the object + for column in self._dialect.columns: + try: + value = column.to_python(kwargs[column.name]) + except KeyError: + # No value was provided + value = None + setattr(self, column.name, value) + + errors = () + + def is_valid(self): + valid = True + self.errors = [] + for column in self._dialect.columns: + value = getattr(self, column.name) + try: + column.validate(value) + except ValueError as e: + self.errors.append(e) + valid = False + return valid + + @classmethod + def reader(cls, file): + return Reader(cls, file) + + @classmethod + def writer(cls, file): + return Writer(cls, file) + + +class Reader: + def __init__(self, row_cls, file): + self.row_cls = row_cls + self.csv_reader = csv.reader(file, **row_cls._dialect.csv_dialect) + self.skip_header_row = row_cls._dialect.has_header_row + + def __iter__(self): + return self + + def __next__(self): + # Skip the first row if it's a header + if self.skip_header_row: + self.csv_reader.__next__() + self.skip_header_row = False + + return self.row_cls(*self.csv_reader.__next__()) + + +class Writer: + def __init__(self, row_cls, file): + self.columns = row_cls._dialect.columns + self._writer = csv.writer(file, row_cls._dialect.csv_dialect) + self.needs_header_row = row_cls._dialect.has_header_row + + def writerow(self, row): + if self.needs_header_row: + values = [column.title.title() for column in self.columns] + self._writer.writerow(values) + self.needs_header_row = False + values = [getattr(row, column.name) for column in self.columns] + self._writer.writerow(values) + + def writerows(self, rows): + for row in rows: + self.writerow(row) + + diff --git a/sheets/columns.py b/sheets/columns.py index 25fa790..15501e3 100644 --- a/sheets/columns.py +++ b/sheets/columns.py @@ -1,108 +1,108 @@ -import functools -import datetime -import decimal - -class Column: - """ - An individual column within a CSV file. This serves as a base for attributes - and methods that are common to all types of columns. Subclasses of Column - will define behavior for more specific data types. - """ - - def __init__(self, title=None, required=True): - self.title = title - self.required = required - self._validators = [self.to_python] - - def attach_to_class(self, cls, name, dialect): - self.cls = cls - self.name = name - self.dialect = dialect - if self.title is None: - # Check for None so that an empty string will skip this behavior - self.title = name.replace('_', ' ') - dialect.add_column(self) - - def to_python(self, value): - """ - Convert the given string to a native Python object. - """ - return value - - def to_string(self, value): - """ - Convert the given Python object to a string. - """ - return value - - # Not yet written about - - def validator(self, func): - self._validators.append(functools.partial(func, self)) - return func - - def validate(self, value): - """ - Validate that the given value matches the column's requirements. - Raise a ValueError only if the given value was invalid. - """ - for validate in self._validators: - validate(value) - - -class StringColumn(Column): - pass - - -class IntegerColumn(Column): - def to_python(self, value): - return int(value) - - -class FloatColumn(Column): - def to_python(self, value): - return float(value) - - -class DecimalColumn(Column): - """ - A column that contains data in the form of decimal values, - represented in Python by decimal.Decimal. - """ - - def to_python(self, value): - try: - return decimal.Decimal(value) - except decimal.InvalidOperation as e: - raise ValueError(str(e)) - - -class DateColumn(Column): - """ - A column that contains data in the form of dates, - represented in Python by datetime.date. - - format - A strptime()-style format string. - See http://docs.python.org/library/datetime.html for details - """ - - def __init__(self, *args, format='%Y-%m-%d', **kwargs): - super(DateColumn, self).__init__(*args, **kwargs) - self.format = format - - def to_python(self, value): - """ - Parse a string value according to self.format - and return only the date portion. - """ - if isinstance(value, datetime.date): - return value - return datetime.datetime.strptime(value, self.format).date() - - def to_string(self, value): - """ - Format a date according to self.format and return that as a string. - """ - return value.strftime(self.format) - +import functools +import datetime +import decimal + +class Column: + """ + An individual column within a CSV file. This serves as a base for attributes + and methods that are common to all types of columns. Subclasses of Column + will define behavior for more specific data types. + """ + + def __init__(self, title=None, required=True): + self.title = title + self.required = required + self._validators = [self.to_python] + + def attach_to_class(self, cls, name, dialect): + self.cls = cls + self.name = name + self.dialect = dialect + if self.title is None: + # Check for None so that an empty string will skip this behavior + self.title = name.replace('_', ' ') + dialect.add_column(self) + + def to_python(self, value): + """ + Convert the given string to a native Python object. + """ + return value + + def to_string(self, value): + """ + Convert the given Python object to a string. + """ + return value + + # Not yet written about + + def validator(self, func): + self._validators.append(functools.partial(func, self)) + return func + + def validate(self, value): + """ + Validate that the given value matches the column's requirements. + Raise a ValueError only if the given value was invalid. + """ + for validate in self._validators: + validate(value) + + +class StringColumn(Column): + pass + + +class IntegerColumn(Column): + def to_python(self, value): + return int(value) + + +class FloatColumn(Column): + def to_python(self, value): + return float(value) + + +class DecimalColumn(Column): + """ + A column that contains data in the form of decimal values, + represented in Python by decimal.Decimal. + """ + + def to_python(self, value): + try: + return decimal.Decimal(value) + except decimal.InvalidOperation as e: + raise ValueError(str(e)) + + +class DateColumn(Column): + """ + A column that contains data in the form of dates, + represented in Python by datetime.date. + + format + A strptime()-style format string. + See http://docs.python.org/library/datetime.html for details + """ + + def __init__(self, *args, format='%Y-%m-%d', **kwargs): + super(DateColumn, self).__init__(*args, **kwargs) + self.format = format + + def to_python(self, value): + """ + Parse a string value according to self.format + and return only the date portion. + """ + if isinstance(value, datetime.date): + return value + return datetime.datetime.strptime(value, self.format).date() + + def to_string(self, value): + """ + Format a date according to self.format and return that as a string. + """ + return value.strftime(self.format) + diff --git a/sheets/options.py b/sheets/options.py index cefe517..b083703 100644 --- a/sheets/options.py +++ b/sheets/options.py @@ -1,12 +1,12 @@ -class Dialect: - def __init__(self, has_header_row=False, **kwargs): - self.has_header_row = has_header_row - self.csv_dialect = kwargs - self.columns = [] - - def add_column(self, column): - self.columns.append(column) - - def finalize(self): - self.columns.sort(key=lambda column: column.counter) - +class Dialect: + def __init__(self, has_header_row=False, **kwargs): + self.has_header_row = has_header_row + self.csv_dialect = kwargs + self.columns = [] + + def add_column(self, column): + self.columns.append(column) + + def finalize(self): + self.columns.sort(key=lambda column: column.counter) + diff --git a/sheets/tests.py b/sheets/tests.py index 2879d42..418eeb5 100644 --- a/sheets/tests.py +++ b/sheets/tests.py @@ -1,22 +1,22 @@ -import unittest -times2 = lambda value: value * 2 - -class MultiplicationTests(unittest.TestCase): - - def setUp(self): - self.factor = 2 - - def testNumber(self): - self.assertEqual(times2(5), 42) - - def testString(self): - self.assertTrue(times2(5) == 10) - - def testTuple(self): - self.assertTrue(times2(5) == 10) - - -if __name__ == '__main__': - unittest.main() - - +import unittest +times2 = lambda value: value * 2 + +class MultiplicationTests(unittest.TestCase): + + def setUp(self): + self.factor = 2 + + def testNumber(self): + self.assertEqual(times2(5), 42) + + def testString(self): + self.assertTrue(times2(5) == 10) + + def testTuple(self): + self.assertTrue(times2(5) == 10) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/tests/columns.py b/tests/columns.py index 032df05..fe48163 100644 --- a/tests/columns.py +++ b/tests/columns.py @@ -1,86 +1,86 @@ -import unittest -import datetime -import decimal - -import sheets - - -class ColumnTests(unittest.TestCase): - invalid_values = [] - - def setUp(self): - self.column = sheets.Column() - self.string_value = 'value' - self.python_value = 'value' - - def test_validation(self): - try: - self.column.validate(self.python_value) - except ValueError as e: - self.fail(str(e)) - - def test_python_conversion(self): - python_value = self.column.to_python(self.string_value) - self.assertEqual(python_value, self.python_value) - - def test_string_conversion(self): - string_value = str(self.column.to_string(self.python_value)) - self.assertEqual(string_value, self.string_value) - - def test_invalid_value(self): - for value in self.invalid_values: - try: - value = self.column.to_python(value) - except ValueError: - # If it's caught here, there's no need to test anything else - return - self.assertRaises(ValueError, self.column.validate, value) - - -class StringColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.StringColumn() - self.string_value = 'value' - self.python_value = 'value' - - -class IntegerColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.IntegerColumn() - self.string_value = '1' - self.python_value = 1 - self.invalid_values = ['invalid'] - - -class FloatColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.FloatColumn() - self.string_value = '1.1' - self.python_value = 1.1 - self.invalid_values = ['invalid'] - - -class DecimalColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.DecimalColumn() - self.string_value = '1.1' - self.python_value = decimal.Decimal('1.1') - self.invalid_values = ['invalid'] - - -class DateColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.DateColumn() - self.string_value = '2010-03-31' - self.python_value = datetime.date(2010, 3, 31) - self.invalid_values = ['invalid', '03-31-2010'] - - -class FormattedDateColumnTests(ColumnTests): - def setUp(self): - self.column = sheets.DateColumn(format='%m/%d/%Y') - self.string_value = '03/31/2010' - self.python_value = datetime.date(2010, 3, 31) - self.invalid_values = ['invalid', '03-31-2010'] - - +import unittest +import datetime +import decimal + +import sheets + + +class ColumnTests(unittest.TestCase): + invalid_values = [] + + def setUp(self): + self.column = sheets.Column() + self.string_value = 'value' + self.python_value = 'value' + + def test_validation(self): + try: + self.column.validate(self.python_value) + except ValueError as e: + self.fail(str(e)) + + def test_python_conversion(self): + python_value = self.column.to_python(self.string_value) + self.assertEqual(python_value, self.python_value) + + def test_string_conversion(self): + string_value = str(self.column.to_string(self.python_value)) + self.assertEqual(string_value, self.string_value) + + def test_invalid_value(self): + for value in self.invalid_values: + try: + value = self.column.to_python(value) + except ValueError: + # If it's caught here, there's no need to test anything else + return + self.assertRaises(ValueError, self.column.validate, value) + + +class StringColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.StringColumn() + self.string_value = 'value' + self.python_value = 'value' + + +class IntegerColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.IntegerColumn() + self.string_value = '1' + self.python_value = 1 + self.invalid_values = ['invalid'] + + +class FloatColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.FloatColumn() + self.string_value = '1.1' + self.python_value = 1.1 + self.invalid_values = ['invalid'] + + +class DecimalColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.DecimalColumn() + self.string_value = '1.1' + self.python_value = decimal.Decimal('1.1') + self.invalid_values = ['invalid'] + + +class DateColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.DateColumn() + self.string_value = '2010-03-31' + self.python_value = datetime.date(2010, 3, 31) + self.invalid_values = ['invalid', '03-31-2010'] + + +class FormattedDateColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.DateColumn(format='%m/%d/%Y') + self.string_value = '03/31/2010' + self.python_value = datetime.date(2010, 3, 31) + self.invalid_values = ['invalid', '03-31-2010'] + + From 994d031196a07ac9231c438555e2fbc5e08db283 Mon Sep 17 00:00:00 2001 From: Imran Haque Date: Mon, 4 Nov 2013 14:37:46 -0800 Subject: [PATCH 2/5] Fix compatibility for Python 2 --- sheets/base.py | 28 ++++++++++++---------------- sheets/columns.py | 22 +++++++++++++++------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/sheets/base.py b/sheets/base.py index 54d8f12..6c918e7 100644 --- a/sheets/base.py +++ b/sheets/base.py @@ -1,10 +1,10 @@ import csv -from collections import OrderedDict from sheets import options __all__ = ['Row', 'Reader', 'Writer'] + class RowMeta(type): def __init__(cls, name, bases, attrs): if 'Dialect' in attrs: @@ -15,34 +15,32 @@ def __init__(cls, name, bases, attrs): # No options were explicitly defined items = {} cls._dialect = options.Dialect(**items) - + for key, attr in attrs.items(): if hasattr(attr, 'attach_to_class'): attr.attach_to_class(cls, key, cls._dialect) - @classmethod - def __prepare__(cls, name, bases): - return OrderedDict() - -class Row(metaclass=RowMeta): +class Row(object): + __metaclass__ = RowMeta # Not yet written about def __init__(self, *args, **kwargs): column_names = [column.name for column in self._dialect.columns] - + # First, make sure the arguments make sense if len(args) > len(column_names): msg = "__init__() takes at most %d arguments (%d given)" raise TypeError(msg % (len(column_names), len(args))) - + for name in kwargs: if name not in column_names: raise TypeError("Got unknown keyword argument '%s'" % name) - + for i, name in enumerate(column_names[:len(args)]): if name in kwargs: - msg = "__init__() got multiple values for keyword argument '%s'" + msg = ("__init__() got multiple values for keyword argument " + "'%s'") raise TypeError(msg % name) kwargs[name] = args[i] @@ -78,7 +76,7 @@ def writer(cls, file): return Writer(cls, file) -class Reader: +class Reader(object): def __init__(self, row_cls, file): self.row_cls = row_cls self.csv_reader = csv.reader(file, **row_cls._dialect.csv_dialect) @@ -92,11 +90,11 @@ def __next__(self): if self.skip_header_row: self.csv_reader.__next__() self.skip_header_row = False - + return self.row_cls(*self.csv_reader.__next__()) -class Writer: +class Writer(object): def __init__(self, row_cls, file): self.columns = row_cls._dialect.columns self._writer = csv.writer(file, row_cls._dialect.csv_dialect) @@ -113,5 +111,3 @@ def writerow(self, row): def writerows(self, rows): for row in rows: self.writerow(row) - - diff --git a/sheets/columns.py b/sheets/columns.py index 15501e3..b17d436 100644 --- a/sheets/columns.py +++ b/sheets/columns.py @@ -2,18 +2,27 @@ import datetime import decimal -class Column: - """ - An individual column within a CSV file. This serves as a base for attributes - and methods that are common to all types of columns. Subclasses of Column - will define behavior for more specific data types. +from itertools import count + + +class Column(object): + """An individual column within a CSV file. + This serves as a base for attributes and methods that are common to all + types of columns. Subclasses of Column will define behavior for more + specific data types. """ + _count = count() # global counter to maintain attr order this can be + # removed in python 3.0 with metaclass.__prepare__. + def __init__(self, title=None, required=True): self.title = title self.required = required self._validators = [self.to_python] + # Hack to maintain class attribute order in Python < 3.0 + self.counter = next(self.__class__._count) + def attach_to_class(self, cls, name, dialect): self.cls = cls self.name = name @@ -87,7 +96,7 @@ class DateColumn(Column): See http://docs.python.org/library/datetime.html for details """ - def __init__(self, *args, format='%Y-%m-%d', **kwargs): + def __init__(self, format='%Y-%m-%d', *args, **kwargs): super(DateColumn, self).__init__(*args, **kwargs) self.format = format @@ -105,4 +114,3 @@ def to_string(self, value): Format a date according to self.format and return that as a string. """ return value.strftime(self.format) - From a989f1a76505c52d4a741294da58da586822269b Mon Sep 17 00:00:00 2001 From: Imran Haque Date: Mon, 4 Nov 2013 13:50:35 -0800 Subject: [PATCH 3/5] Add new column types --- sheets/columns.py | 130 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 114 insertions(+), 16 deletions(-) diff --git a/sheets/columns.py b/sheets/columns.py index b17d436..2bbb54c 100644 --- a/sheets/columns.py +++ b/sheets/columns.py @@ -1,12 +1,13 @@ -import functools import datetime import decimal +import functools from itertools import count class Column(object): """An individual column within a CSV file. + This serves as a base for attributes and methods that are common to all types of columns. Subclasses of Column will define behavior for more specific data types. @@ -63,6 +64,15 @@ class StringColumn(Column): pass +class UnicodeColumn(Column): + """A column containing Unicode data. + + Emits UTF-8 when asked to output text. + """ + def to_string(self, value): + return value.encode('utf-8') + + class IntegerColumn(Column): def to_python(self, value): return int(value) @@ -73,10 +83,63 @@ def to_python(self, value): return float(value) -class DecimalColumn(Column): +class FloatWithCommaSeparatorsColumn(Column): + """A column containing floats with comma thousands separators. + + e.g., "1,000,000.12" -> 1000000.12 + + Note that this is NOT intended to handle "Euro" floats, in which + a comma is used as the decimal separator. """ - A column that contains data in the form of decimal values, - represented in Python by decimal.Decimal. + def to_python(self, value): + if isinstance(value, basestring): + return float(value.replace(',', '')) + else: + return float(value) + + +class BooleanColumn(Column): + _default_bool_map = {'true': True, 'false': False} + + def __init__(self, bool_map=None, inverted_bool_map=None, *args, **kwargs): + """ + Parameters + ---------- + bool_map: dict-like + Mapping of string keys to bool values. Useful if for handling + non-standard boolean mappings, e.g. {'y': True, 'n': False}. + + inverted_bool_map: dict-like (default: inverse of `bool_map`) + Mapping of Boolean keys to default string values, e.g. + {True: 'yes', False: 'no}. + """ + self._bool_map = (self._default_bool_map + if bool_map is None else bool_map) + default_inverted_bool_map = {value: key for key, value + in self._bool_map.iteritems()} + self._inverted_bool_map = (default_inverted_bool_map + if inverted_bool_map is None + else inverted_bool_map) + + super(BooleanColumn, self).__init__(*args, **kwargs) + + def to_python(self, value): + bool_map = self._bool_map + str_value = str(value).lower() + if str_value not in bool_map: + raise ValueError("cannot map '%s' to boolean with map %s" % + (value, bool_map)) + + return bool_map[str_value] + + def to_string(self, value): + return self._inverted_bool_map[value] + + +class DecimalColumn(Column): + """A column that contains data in the form of decimal values. + + Represented in Python by decimal.Decimal. """ def to_python(self, value): @@ -86,31 +149,66 @@ def to_python(self, value): raise ValueError(str(e)) -class DateColumn(Column): - """ - A column that contains data in the form of dates, - represented in Python by datetime.date. +class DateTimeColumn(Column): + """A column that contains data in the form of dates with times. + + Represented in Python by datetime.datetime. format A strptime()-style format string. See http://docs.python.org/library/datetime.html for details + This string will be used as the output format for this column, + and as the default input format as well. + format_list + A list of strptime()-style format strings. + Entries in format_list will be used as alternative input formats + for this column. + timezone + A pytz.timezone object. + If non-None, datetimes parsed by this column will be localized + into the provided timezone. If None, datetimes parsed by this + column will be naive. """ - def __init__(self, format='%Y-%m-%d', *args, **kwargs): - super(DateColumn, self).__init__(*args, **kwargs) + def __init__(self, format='%Y-%m-%d', format_list=None, timezone=None, + *args, **kwargs): + super(DateTimeColumn, self).__init__(*args, **kwargs) self.format = format + self.input_formats = set([format] + (format_list or [])) + self.timezone = timezone def to_python(self, value): - """ - Parse a string value according to self.format - and return only the date portion. + """Parse a string value according to self.format. """ if isinstance(value, datetime.date): return value - return datetime.datetime.strptime(value, self.format).date() + + for format in self.input_formats: + try: + dt = datetime.datetime.strptime(value, format) + if self.timezone is not None: + dt = self.timezone.localize(dt) + return dt + except ValueError: + continue + + msg = ("time data %s does not match any of the formats: %s" % + (value, ", ".join(map(repr, self.input_formats)))) + raise ValueError(msg) def to_string(self, value): - """ - Format a date according to self.format and return that as a string. + """Format a date according to self.format and return that as a string. """ return value.strftime(self.format) + + +class DateColumn(DateTimeColumn): + """ + A column that contains data in the form of dates. + + Represented in Python by datetime.date. + + See DateTimeColumn.__init__ for parameters. + """ + def to_python(self, value): + return super(DateColumn, self).to_python(value).date() From 02365088b99a9c4532c559a2466c7110f69d111c Mon Sep 17 00:00:00 2001 From: Imran Haque Date: Mon, 4 Nov 2013 14:30:10 -0800 Subject: [PATCH 4/5] Fix datetime tests --- sheets/columns.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sheets/columns.py b/sheets/columns.py index 2bbb54c..3e18f68 100644 --- a/sheets/columns.py +++ b/sheets/columns.py @@ -180,8 +180,10 @@ def __init__(self, format='%Y-%m-%d', format_list=None, timezone=None, def to_python(self, value): """Parse a string value according to self.format. """ - if isinstance(value, datetime.date): + if isinstance(value, datetime.datetime): return value + elif isinstance(value, datetime.date): + return datetime.datetime(value.year, value.month, value.day) for format in self.input_formats: try: From 3fe08f1d26d1b6a344508f4f6c41c0500c7d54c8 Mon Sep 17 00:00:00 2001 From: Imran Haque Date: Mon, 4 Nov 2013 15:35:26 -0800 Subject: [PATCH 5/5] Add tests to new column types --- sheets/columns.py | 21 +++++++++- tests/columns.py | 105 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 101 insertions(+), 25 deletions(-) diff --git a/sheets/columns.py b/sheets/columns.py index 3e18f68..8b299e6 100644 --- a/sheets/columns.py +++ b/sheets/columns.py @@ -68,9 +68,21 @@ class UnicodeColumn(Column): """A column containing Unicode data. Emits UTF-8 when asked to output text. + + encoding + The Unicode encoding to use. Defaults to utf-8. """ + def __init__(self, encoding='utf-8', *args, **kwargs): + super(UnicodeColumn, self).__init__(*args, **kwargs) + self.encoding = encoding + + def to_python(self, value): + if isinstance(value, unicode): + return value + return value.decode(self.encoding) + def to_string(self, value): - return value.encode('utf-8') + return value.encode(self.encoding) class IntegerColumn(Column): @@ -91,6 +103,10 @@ class FloatWithCommaSeparatorsColumn(Column): Note that this is NOT intended to handle "Euro" floats, in which a comma is used as the decimal separator. """ + def to_string(self, value): + # NB - Requires Python >= 2.7 + return "{:,}".format(value) + def to_python(self, value): if isinstance(value, basestring): return float(value.replace(',', '')) @@ -124,6 +140,9 @@ def __init__(self, bool_map=None, inverted_bool_map=None, *args, **kwargs): super(BooleanColumn, self).__init__(*args, **kwargs) def to_python(self, value): + if isinstance(value, bool): + return value + bool_map = self._bool_map str_value = str(value).lower() if str_value not in bool_map: diff --git a/tests/columns.py b/tests/columns.py index fe48163..bf0904a 100644 --- a/tests/columns.py +++ b/tests/columns.py @@ -1,31 +1,35 @@ -import unittest import datetime import decimal +from itertools import izip +import unittest import sheets class ColumnTests(unittest.TestCase): invalid_values = [] - + def setUp(self): self.column = sheets.Column() - self.string_value = 'value' - self.python_value = 'value' + self.string_values = ['value'] + self.python_values = ['value'] def test_validation(self): - try: - self.column.validate(self.python_value) - except ValueError as e: - self.fail(str(e)) + for pyval in self.python_values: + try: + self.column.validate(pyval) + except ValueError as e: + self.fail(unicode(e)) def test_python_conversion(self): - python_value = self.column.to_python(self.string_value) - self.assertEqual(python_value, self.python_value) + for sval, pval in izip(self.string_values, self.python_values): + python_value = self.column.to_python(sval) + self.assertEqual(python_value, pval) def test_string_conversion(self): - string_value = str(self.column.to_string(self.python_value)) - self.assertEqual(string_value, self.string_value) + for sval, pval in izip(self.string_values, self.python_values): + string_value = str(self.column.to_string(pval)) + self.assertEqual(string_value, sval) def test_invalid_value(self): for value in self.invalid_values: @@ -40,47 +44,100 @@ def test_invalid_value(self): class StringColumnTests(ColumnTests): def setUp(self): self.column = sheets.StringColumn() - self.string_value = 'value' - self.python_value = 'value' + self.string_values = ['value'] + self.python_values = ['value'] + + +class UnicodeColumnTests_UTF8(ColumnTests): + def setUp(self): + self.column = sheets.UnicodeColumn(encoding='utf-8') + self.string_values = ['Spin\xcc\x88al Tap'] + self.python_values = [u'Spin\u0308al Tap'] + + +class UnicodeColumnTests_UTF16(ColumnTests): + def setUp(self): + self.column = sheets.UnicodeColumn(encoding='utf-16') + self.string_values = [('\xff\xfeS\x00p\x00i\x00n\x00\x08\x03' + 'a\x00l\x00 \x00T\x00a\x00p\x00')] + self.python_values = [u'Spin\u0308al Tap'] class IntegerColumnTests(ColumnTests): def setUp(self): self.column = sheets.IntegerColumn() - self.string_value = '1' - self.python_value = 1 + self.string_values = ['1'] + self.python_values = [1] self.invalid_values = ['invalid'] class FloatColumnTests(ColumnTests): def setUp(self): self.column = sheets.FloatColumn() - self.string_value = '1.1' - self.python_value = 1.1 + self.string_values = ['1.1'] + self.python_values = [1.1] self.invalid_values = ['invalid'] +class FloatWithCommaSeparatorsColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.FloatWithCommaSeparatorsColumn() + self.string_values = ['1,024.5'] + self.python_values = [1024.5] + self.invalid_values = ['in,valid', '1.2,3.4,5.6'] + + +class BooleanColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.BooleanColumn() + self.string_values = ['true', 'false'] + self.python_values = [True, False] + self.invalid_values = ['yes', 'True'] + + +class MappedBooleanColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.BooleanColumn(bool_map={'yes': True, 'no': False}) + self.string_values = ['yes', 'no'] + self.python_values = [True, False] + self.invalid_values = ['true', 'false'] + + class DecimalColumnTests(ColumnTests): def setUp(self): self.column = sheets.DecimalColumn() - self.string_value = '1.1' - self.python_value = decimal.Decimal('1.1') + self.string_values = ['1.1'] + self.python_values = [decimal.Decimal('1.1')] self.invalid_values = ['invalid'] class DateColumnTests(ColumnTests): def setUp(self): self.column = sheets.DateColumn() - self.string_value = '2010-03-31' - self.python_value = datetime.date(2010, 3, 31) + self.string_values = ['2010-03-31'] + self.python_values = [datetime.date(2010, 3, 31)] self.invalid_values = ['invalid', '03-31-2010'] class FormattedDateColumnTests(ColumnTests): def setUp(self): self.column = sheets.DateColumn(format='%m/%d/%Y') - self.string_value = '03/31/2010' - self.python_value = datetime.date(2010, 3, 31) + self.string_values = ['03/31/2010'] + self.python_values = [datetime.date(2010, 3, 31)] + self.invalid_values = ['invalid', '03-31-2010'] + + +class DateTimeColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.DateTimeColumn() + self.string_values = ['2010-03-31'] + self.python_values = [datetime.datetime(2010, 3, 31)] self.invalid_values = ['invalid', '03-31-2010'] +class FormattedDateTimeColumnTests(ColumnTests): + def setUp(self): + self.column = sheets.DateTimeColumn(format='%m/%d/%Y %H:%M:%S') + self.string_values = ['03/31/2010 11:43:12'] + self.python_values = [datetime.datetime(2010, 3, 31, 11, 43, 12)] + self.invalid_values = ['invalid', '03/31/2010']