From 5ca62fc353862cdc1164a0d8ec33382844c34226 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 13:29:07 -0700
Subject: [PATCH 01/17] Add assign method to dataframe

---
 pygdf/dataframe.py            | 6 ++++++
 pygdf/tests/test_dataframe.py | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index bc65a6ee9b1..95cb498741d 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -213,6 +213,12 @@ def __len__(self):
         """
         return self._size
 
+    def assign(self, **kwargs):
+        new = self.copy()
+        for k, v in kwargs.items():
+            new[k] = v
+        return new
+
     def head(self, n=5):
         return self[:n]
 
diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index 4c7e15390d3..8724ab60144 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -518,6 +518,15 @@ def test_dataframe_setitem_index_len1():
     np.testing.assert_equal(gdf.b.to_array(), [0])
 
 
+def test_assign():
+    gdf = DataFrame({'x': [1, 2, 3]})
+    gdf2 = gdf.assign(y=gdf.x + 1)
+    assert gdf.columns == ['x']
+    assert gdf2.columns == ['x', 'y']
+
+    np.testing.assert_equal(gdf2.y.to_array(), [2, 3, 4])
+
+
 @pytest.mark.parametrize('nrows', [1, 8, 100, 1000])
 def test_dataframe_hash_columns(nrows):
     gdf = DataFrame()

From a771fa9c7ae6fe4138bf74f43edcb12b82a9a2d3 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 13:29:23 -0700
Subject: [PATCH 02/17] add pow method to series, but only for the value 2

(sorry for the hack, this was useful for std/var in dask-gdf)
---
 pygdf/series.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pygdf/series.py b/pygdf/series.py
index d5e866186d1..3d4f651f839 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -306,6 +306,12 @@ def __mul__(self, other):
     def __rmul__(self, other):
         return self._rbinaryop(other, 'mul')
 
+    def __pow__(self, other):
+        if other == 2:
+            return self * self
+        else:
+            return NotImplemented
+
     def __floordiv__(self, other):
         return self._binaryop(other, 'floordiv')
 

From 8f8737b8e0d21b28138d45dab30dff76b8507545 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 13:29:54 -0700
Subject: [PATCH 03/17] Add axis and skipna keywords to series reductions

These don't do anything currently, and are silently ignored
---
 pygdf/series.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pygdf/series.py b/pygdf/series.py
index 3d4f651f839..b47e685ff72 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -694,35 +694,35 @@ def find_last_value(self, value):
     #
     # Stats
     #
-    def count(self):
+    def count(self, axis=None, skipna=True):
         """The number of non-null values"""
         return self.valid_count
 
-    def min(self):
+    def min(self, axis=None, skipna=True):
         """Compute the min of the series
         """
         return self._column.min()
 
-    def max(self):
+    def max(self, axis=None, skipna=True):
         """Compute the max of the series
         """
         return self._column.max()
 
-    def sum(self):
+    def sum(self, axis=None, skipna=True):
         """Compute the sum of the series"""
         return self._column.sum()
 
-    def mean(self):
+    def mean(self, axis=None, skipna=True):
         """Compute the mean of the series
         """
         return self._column.mean()
 
-    def std(self, ddof=1):
+    def std(self, ddof=1, axis=None, skipna=True):
         """Compute the standard deviation of the series
         """
         return np.sqrt(self.var(ddof=ddof))
 
-    def var(self, ddof=1):
+    def var(self, ddof=1, axis=None, skipna=True):
         """Compute the variance of the series
         """
         mu, var = self.mean_var(ddof=ddof)

From 50ba8a62b564934efc0b59434e420dfea5f3f767 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 16:24:38 -0700
Subject: [PATCH 04/17] assert axis and skipna in Series reductions

---
 pygdf/series.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pygdf/series.py b/pygdf/series.py
index b47e685ff72..8843f77bea7 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -696,35 +696,42 @@ def find_last_value(self, value):
     #
     def count(self, axis=None, skipna=True):
         """The number of non-null values"""
+        assert axis in (None, 0) and skipna is True
         return self.valid_count
 
     def min(self, axis=None, skipna=True):
         """Compute the min of the series
         """
+        assert axis in (None, 0) and skipna is True
         return self._column.min()
 
     def max(self, axis=None, skipna=True):
         """Compute the max of the series
         """
+        assert axis in (None, 0) and skipna is True
         return self._column.max()
 
     def sum(self, axis=None, skipna=True):
         """Compute the sum of the series"""
+        assert axis in (None, 0) and skipna is True
         return self._column.sum()
 
     def mean(self, axis=None, skipna=True):
         """Compute the mean of the series
         """
+        assert axis in (None, 0) and skipna is True
         return self._column.mean()
 
     def std(self, ddof=1, axis=None, skipna=True):
         """Compute the standard deviation of the series
         """
+        assert axis in (None, 0) and skipna is True
         return np.sqrt(self.var(ddof=ddof))
 
     def var(self, ddof=1, axis=None, skipna=True):
         """Compute the variance of the series
         """
+        assert axis in (None, 0) and skipna is True
         mu, var = self.mean_var(ddof=ddof)
         return var
 

From 46cb1d4baf111055d35b1b3e6c82de63ad120eae Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 16:28:05 -0700
Subject: [PATCH 05/17] fix assign test

---
 pygdf/dataframe.py            | 2 ++
 pygdf/tests/test_dataframe.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index 95cb498741d..194bd905c38 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -86,6 +86,8 @@ def __init__(self, name_series=None, index=None):
         self._cols = OrderedDict()
         # has initializer?
         if name_series is not None:
+            if isinstance(name_series, dict):
+                name_series = name_series.items()
             for k, series in name_series:
                 self.add_column(k, series, forceindex=index is not None)
 
diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index 8724ab60144..fee63a700f4 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -521,8 +521,8 @@ def test_dataframe_setitem_index_len1():
 def test_assign():
     gdf = DataFrame({'x': [1, 2, 3]})
     gdf2 = gdf.assign(y=gdf.x + 1)
-    assert gdf.columns == ['x']
-    assert gdf2.columns == ['x', 'y']
+    assert list(gdf.columns) == ['x']
+    assert list(gdf2.columns) == ['x', 'y']
 
     np.testing.assert_equal(gdf2.y.to_array(), [2, 3, 4])
 

From 84b14c435ea4a2c2ba2a228e614d9e5786fef38a Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 16:44:45 -0700
Subject: [PATCH 06/17] Add Series.name and Series.from_pandas

---
 pygdf/dataframe.py            |  1 +
 pygdf/series.py               | 22 +++++++++++++++++++++-
 pygdf/tests/test_dataframe.py | 20 ++++++++++++++++++++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index 194bd905c38..213b2d634ad 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -423,6 +423,7 @@ def add_column(self, name, data, forceindex=False):
             raise NameError('duplicated column name {!r}'.format(name))
 
         series = self._prepare_series_for_add(data, forceindex=forceindex)
+        series.name = name
         self._cols[name] = series
 
     def drop_column(self, name):
diff --git a/pygdf/series.py b/pygdf/series.py
index 8843f77bea7..a2ccbab863f 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -6,6 +6,7 @@
 from numbers import Number
 
 import numpy as np
+import pandas as pd
 
 from . import cudautils, formatting
 from .buffer import Buffer
@@ -59,9 +60,21 @@ def from_masked_array(cls, data, mask, null_count=None):
         return cls(data=col)
 
     def __init__(self, data, index=None):
+        name = None
+        if isinstance(data, pd.Series):
+            from .dataframe import DataFrame
+            inp = data
+            name = data.name
+            data = data.to_frame()
+            data.columns = ['x']
+            data = DataFrame.from_pandas(data)
+            data = data['x']
+            data.name = name
         if isinstance(data, Series):
             index = data._index
+            name = data.name
             data = data._column
+
         if not isinstance(data, columnops.TypedColumnBase):
             data = columnops.as_column(data)
 
@@ -71,6 +84,11 @@ def __init__(self, data, index=None):
         assert isinstance(data, columnops.TypedColumnBase)
         self._column = data
         self._index = RangeIndex(len(data)) if index is None else index
+        self.name = name
+
+    @classmethod
+    def from_pandas(cls, s):
+        return cls(s)
 
     def serialize(self, serialize):
         header = {}
@@ -448,7 +466,9 @@ def to_gpu_array(self, fillna=None):
     def to_pandas(self, index=True):
         if index is True:
             index = self.index.to_pandas()
-        return self._column.to_pandas(index=index)
+        s = self._column.to_pandas(index=index)
+        s.name = self.name
+        return s
 
     @property
     def data(self):
diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index fee63a700f4..ecc18e822f4 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -670,3 +670,23 @@ def do_slice(x):
     got = do_slice(gdf).to_pandas()
 
     pd.testing.assert_frame_equal(expect, got)
+
+
+def test_from_pandas():
+    df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+    gdf = gd.DataFrame.from_pandas(df)
+    assert isinstance(gdf, gd.DataFrame)
+
+    pd.testing.assert_frame_equal(df, gdf.to_pandas())
+
+    s = df.x
+    gs = gd.Series.from_pandas(s)
+    assert isinstance(gs, gd.Series)
+
+    pd.testing.assert_series_equal(s, gs.to_pandas())
+
+
+def test_series_name():
+    df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+    gdf = gd.DataFrame.from_pandas(df)
+    assert gdf['x'].name == 'x'

From d1bc16e5d09393477a203f9e1771fc58ee8eef9b Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Wed, 26 Sep 2018 16:44:45 -0700
Subject: [PATCH 07/17] Add Series.name and Series.from_pandas

---
 pygdf/dataframe.py            |  1 +
 pygdf/series.py               | 21 ++++++++++++++++++++-
 pygdf/tests/test_dataframe.py | 20 ++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index 194bd905c38..213b2d634ad 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -423,6 +423,7 @@ def add_column(self, name, data, forceindex=False):
             raise NameError('duplicated column name {!r}'.format(name))
 
         series = self._prepare_series_for_add(data, forceindex=forceindex)
+        series.name = name
         self._cols[name] = series
 
     def drop_column(self, name):
diff --git a/pygdf/series.py b/pygdf/series.py
index 8843f77bea7..81bee059e63 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -6,6 +6,7 @@
 from numbers import Number
 
 import numpy as np
+import pandas as pd
 
 from . import cudautils, formatting
 from .buffer import Buffer
@@ -59,9 +60,20 @@ def from_masked_array(cls, data, mask, null_count=None):
         return cls(data=col)
 
     def __init__(self, data, index=None):
+        name = None
+        if isinstance(data, pd.Series):
+            from .dataframe import DataFrame
+            name = data.name
+            data = data.to_frame()
+            data.columns = ['x']
+            data = DataFrame.from_pandas(data)
+            data = data['x']
+            data.name = name
         if isinstance(data, Series):
             index = data._index
+            name = data.name
             data = data._column
+
         if not isinstance(data, columnops.TypedColumnBase):
             data = columnops.as_column(data)
 
@@ -71,6 +83,11 @@ def __init__(self, data, index=None):
         assert isinstance(data, columnops.TypedColumnBase)
         self._column = data
         self._index = RangeIndex(len(data)) if index is None else index
+        self.name = name
+
+    @classmethod
+    def from_pandas(cls, s):
+        return cls(s)
 
     def serialize(self, serialize):
         header = {}
@@ -448,7 +465,9 @@ def to_gpu_array(self, fillna=None):
     def to_pandas(self, index=True):
         if index is True:
             index = self.index.to_pandas()
-        return self._column.to_pandas(index=index)
+        s = self._column.to_pandas(index=index)
+        s.name = self.name
+        return s
 
     @property
     def data(self):
diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index fee63a700f4..ecc18e822f4 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -670,3 +670,23 @@ def do_slice(x):
     got = do_slice(gdf).to_pandas()
 
     pd.testing.assert_frame_equal(expect, got)
+
+
+def test_from_pandas():
+    df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+    gdf = gd.DataFrame.from_pandas(df)
+    assert isinstance(gdf, gd.DataFrame)
+
+    pd.testing.assert_frame_equal(df, gdf.to_pandas())
+
+    s = df.x
+    gs = gd.Series.from_pandas(s)
+    assert isinstance(gs, gd.Series)
+
+    pd.testing.assert_series_equal(s, gs.to_pandas())
+
+
+def test_series_name():
+    df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+    gdf = gd.DataFrame.from_pandas(df)
+    assert gdf['x'].name == 'x'

From b4e26a32e5ffdeb218c8ff1f8f31b7f7d57427e5 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:01:08 -0700
Subject: [PATCH 08/17] Use normal Series/Index constructors

---
 pygdf/series.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pygdf/series.py b/pygdf/series.py
index 81bee059e63..22a51b6a024 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -62,13 +62,8 @@ def from_masked_array(cls, data, mask, null_count=None):
     def __init__(self, data, index=None):
         name = None
         if isinstance(data, pd.Series):
-            from .dataframe import DataFrame
             name = data.name
-            data = data.to_frame()
-            data.columns = ['x']
-            data = DataFrame.from_pandas(data)
-            data = data['x']
-            data.name = name
+            index = GenericIndex(data.index)
         if isinstance(data, Series):
             index = data._index
             name = data.name

From 4ef79cc4d43fd618ce2d81b71e7a332c253224c5 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:01:38 -0700
Subject: [PATCH 09/17] import Index at top level

Fixes #259
---
 pygdf/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pygdf/__init__.py b/pygdf/__init__.py
index c6082cb8fa0..242ca426c85 100644
--- a/pygdf/__init__.py
+++ b/pygdf/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2018, NVIDIA CORPORATION.
 from .dataframe import DataFrame
+from .index import Index
 from .series import Series
 from .multi import concat
 

From 761f5d33da1d83954f4ed3e2208345dcdd06f1b3 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:03:55 -0700
Subject: [PATCH 10/17] remove __all__ from __init__.py

We were listing all locals in the file, so it doesn't seem to accomplish much.
It does however require people editing this file to make two changes rather
than one.
---
 pygdf/__init__.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pygdf/__init__.py b/pygdf/__init__.py
index 242ca426c85..449c8a5fd17 100644
--- a/pygdf/__init__.py
+++ b/pygdf/__init__.py
@@ -11,10 +11,3 @@
 from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
-
-__all__ = [
-    DataFrame,
-    Series,
-    concat,
-    set_options,
-]

From 7c672fb18f0e114033aa35a1ed8ae3b982e5ebdc Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:09:19 -0700
Subject: [PATCH 11/17] add failing test for index coercion

---
 pygdf/tests/test_dataframe.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index ecc18e822f4..0323d4d0d12 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -690,3 +690,12 @@ def test_series_name():
     df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
     gdf = gd.DataFrame.from_pandas(df)
     assert gdf['x'].name == 'x'
+
+
+@pytest.mark.xfail(reason="constructor does not coerce index inputs")
+def test_index_in_dataframe_constructor():
+    a = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+    b = gd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
+
+    pd.testing.assert_frame_equal(a, b.to_pandas())
+    assert pd.testing.assert_frame_equal(a.loc[4:], b.loc[4:].to_pandas())

From c2675e566f9399917294209b9b6b8faf85b7a25f Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:33:27 -0700
Subject: [PATCH 12/17] add basic reductions to Index

---
 pygdf/index.py            |  9 +++++++++
 pygdf/tests/test_index.py | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/pygdf/index.py b/pygdf/index.py
index 28d68275235..df633d62867 100644
--- a/pygdf/index.py
+++ b/pygdf/index.py
@@ -53,6 +53,15 @@ def to_pandas(self):
     def gpu_values(self):
         return self.as_column().to_gpu_array()
 
+    def min(self):
+        return self.as_column().min()
+
+    def max(self):
+        return self.as_column().max()
+
+    def sum(self):
+        return self.as_column().sum()
+
     def find_segments(self):
         """Return the beginning index for segments
 
diff --git a/pygdf/tests/test_index.py b/pygdf/tests/test_index.py
index 822e529d722..0baf2b5c61c 100644
--- a/pygdf/tests/test_index.py
+++ b/pygdf/tests/test_index.py
@@ -71,3 +71,15 @@ def test_index_comparision():
     assert gi == rg
     assert rg[:-1] != gi
     assert rg[:-1] == gi[:-1]
+
+
+@pytest.mark.parametrize('func', [
+    lambda x: x.min(),
+    lambda x: x.max(),
+    lambda x: x.sum(),
+])
+def test_index_find_label_range(func):
+    x = np.asarray([4, 5, 6, 10])
+    idx = GenericIndex(np.asarray([4, 5, 6, 10]))
+
+    assert func(x) == func(idx)

From 6dca2ef14951e381b9ffa4058891c8a15c4468a7 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 05:41:29 -0700
Subject: [PATCH 13/17] add name to index

---
 pygdf/index.py            | 9 ++++++---
 pygdf/tests/test_index.py | 7 +++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/pygdf/index.py b/pygdf/index.py
index df633d62867..47a642e42b5 100644
--- a/pygdf/index.py
+++ b/pygdf/index.py
@@ -109,7 +109,7 @@ def join(self, other, method, how='left', return_indexers=False):
 class RangeIndex(Index):
     """Basic start..stop
     """
-    def __init__(self, start, stop=None):
+    def __init__(self, start, stop=None, name=None):
         """RangeIndex(size), RangeIndex(start, stop)
 
         Parameters
@@ -120,6 +120,7 @@ def __init__(self, start, stop=None):
             start, stop = 0, start
         self._start = int(start)
         self._stop = int(stop)
+        self.name = name
 
     def __repr__(self):
         return "{}(start={}, stop={})".format(self.__class__.__name__,
@@ -192,7 +193,7 @@ def index_from_range(start, stop=None, step=None):
 
 
 class GenericIndex(Index):
-    def __new__(self, values):
+    def __new__(self, values, name=None):
         from .series import Series
 
         # normalize the input
@@ -209,6 +210,7 @@ def __new__(self, values):
         # Make GenericIndex object
         res = Index.__new__(GenericIndex)
         res._values = values
+        res.name = name
         return res
 
     def serialize(self, serialize):
@@ -280,7 +282,7 @@ def find_label_range(self, first, last):
 class DatetimeIndex(GenericIndex):
     # TODO this constructor should take a timezone or something to be
     # consistent with pandas
-    def __new__(self, values):
+    def __new__(self, values, name=None):
         # we should be more strict on what we accept here but
         # we'd have to go and figure out all the semantics around
         # pandas dtindex creation first which.  For now
@@ -294,6 +296,7 @@ def __new__(self, values):
         # override __new__ properly
         res = Index.__new__(DatetimeIndex)
         res._values = values
+        res.name = name
         return res
 
     @property
diff --git a/pygdf/tests/test_index.py b/pygdf/tests/test_index.py
index 0baf2b5c61c..7dfbee46fa4 100644
--- a/pygdf/tests/test_index.py
+++ b/pygdf/tests/test_index.py
@@ -83,3 +83,10 @@ def test_index_find_label_range(func):
     idx = GenericIndex(np.asarray([4, 5, 6, 10]))
 
     assert func(x) == func(idx)
+
+
+def test_name():
+    x = np.asarray([4, 5, 6, 10])
+    idx = GenericIndex(np.asarray([4, 5, 6, 10]), name='foo')
+    assert idx.name == 'foo'
+

From 68754662f0b3e893ba88b1bb4ab31a8195616269 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 06:15:55 -0700
Subject: [PATCH 14/17] improve support for name

---
 pygdf/dataframe.py |  4 +++-
 pygdf/series.py    | 11 ++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index 213b2d634ad..f3a7d4691bb 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -178,7 +178,9 @@ def __getitem__(self, arg):
         3    3    3
         """
         if isinstance(arg, str) or isinstance(arg, int):
-            return self._cols[arg]
+            s = self._cols[arg]
+            assert s.name == arg
+            return s
         elif isinstance(arg, slice):
             df = DataFrame()
             for k, col in self._cols.items():
diff --git a/pygdf/series.py b/pygdf/series.py
index 22a51b6a024..5e06c7ee190 100644
--- a/pygdf/series.py
+++ b/pygdf/series.py
@@ -59,8 +59,7 @@ def from_masked_array(cls, data, mask, null_count=None):
         col = columnops.as_column(data).set_mask(mask, null_count=null_count)
         return cls(data=col)
 
-    def __init__(self, data, index=None):
-        name = None
+    def __init__(self, data, index=None, name=None):
         if isinstance(data, pd.Series):
             name = data.name
             index = GenericIndex(data.index)
@@ -116,6 +115,7 @@ def _copy_construct_defaults(self):
         return dict(
             data=self._column,
             index=self._index,
+            name=self.name,
         )
 
     def _copy_construct(self, **kwargs):
@@ -388,8 +388,13 @@ def _concat(cls, objs, index=True):
         if index is True:
             index = Index._concat([o.index for o in objs])
 
+        names = {obj.name for obj in objs}
+        if len(names) == 1:
+            [name] = names
+        else:
+            name = None
         col = Column._concat([o._column for o in objs])
-        return cls(data=col, index=index)
+        return cls(data=col, index=index, name=name)
 
     def append(self, arbitrary):
         """Append values from another ``Series`` or array-like object.

From 1d4e83f611d65c2f818c3b12d4a8f73642428193 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 06:17:43 -0700
Subject: [PATCH 15/17] flake8

---
 pygdf/tests/test_dataframe.py | 6 ------
 pygdf/tests/test_index.py     | 4 +---
 setup.cfg                     | 2 +-
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index 0323d4d0d12..acb440ce429 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -686,12 +686,6 @@ def test_from_pandas():
     pd.testing.assert_series_equal(s, gs.to_pandas())
 
 
-def test_series_name():
-    df = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
-    gdf = gd.DataFrame.from_pandas(df)
-    assert gdf['x'].name == 'x'
-
-
 @pytest.mark.xfail(reason="constructor does not coerce index inputs")
 def test_index_in_dataframe_constructor():
     a = pd.DataFrame({'x': [1, 2, 3]}, index=[4., 5., 6.])
diff --git a/pygdf/tests/test_index.py b/pygdf/tests/test_index.py
index 7dfbee46fa4..68860be6f3e 100644
--- a/pygdf/tests/test_index.py
+++ b/pygdf/tests/test_index.py
@@ -78,7 +78,7 @@ def test_index_comparision():
     lambda x: x.max(),
     lambda x: x.sum(),
 ])
-def test_index_find_label_range(func):
+def test_reductions(func):
     x = np.asarray([4, 5, 6, 10])
     idx = GenericIndex(np.asarray([4, 5, 6, 10]))
 
@@ -86,7 +86,5 @@ def test_index_find_label_range(func):
 
 
 def test_name():
-    x = np.asarray([4, 5, 6, 10])
     idx = GenericIndex(np.asarray([4, 5, 6, 10]), name='foo')
     assert idx.name == 'foo'
-
diff --git a/setup.cfg b/setup.cfg
index 60dcff452f9..82dd2d8bcd2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,4 +13,4 @@ tag_prefix = v
 parentdir_prefix = pygdf-
 
 [flake8]
-exclude = img,notebooks,thirdparty
+exclude = img,notebooks,thirdparty,__init__.py

From a938ba83d9e0c6bc20f357d28aed47265d128e27 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 11:21:41 -0700
Subject: [PATCH 16/17] set Series name

---
 pygdf/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index f3a7d4691bb..1e59294efa7 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -179,7 +179,7 @@ def __getitem__(self, arg):
         """
         if isinstance(arg, str) or isinstance(arg, int):
             s = self._cols[arg]
-            assert s.name == arg
+            s.name = arg
             return s
         elif isinstance(arg, slice):
             df = DataFrame()

From 64abb9bff5a3c784c3859814139f3421ea383930 Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Thu, 27 Sep 2018 11:23:01 -0700
Subject: [PATCH 17/17] Use Pandas Index for columns attribute

---
 pygdf/dataframe.py            |  2 +-
 pygdf/tests/test_dataframe.py | 22 +++++++++++-----------
 pygdf/tests/test_onehot.py    |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/pygdf/dataframe.py b/pygdf/dataframe.py
index 1e59294efa7..35b60bbc427 100644
--- a/pygdf/dataframe.py
+++ b/pygdf/dataframe.py
@@ -304,7 +304,7 @@ def loc(self):
     def columns(self):
         """Returns a tuple of columns
         """
-        return tuple(self._cols)
+        return pd.Index(self._cols)
 
     @property
     def index(self):
diff --git a/pygdf/tests/test_dataframe.py b/pygdf/tests/test_dataframe.py
index acb440ce429..70441529358 100644
--- a/pygdf/tests/test_dataframe.py
+++ b/pygdf/tests/test_dataframe.py
@@ -123,7 +123,7 @@ def test_dataframe_basic():
     df['vals'] = rnd_vals
     np.testing.assert_equal(df['vals'].to_array(), rnd_vals)
     assert len(df) == 10
-    assert df.columns == ('keys', 'vals')
+    assert tuple(df.columns) == ('keys', 'vals')
 
     # Make another dataframe
     df2 = DataFrame()
@@ -177,13 +177,13 @@ def test_dataframe_column_add_drop():
     data = np.asarray(range(10))
     df['a'] = data
     df['b'] = data
-    assert df.columns == ('a', 'b')
+    assert tuple(df.columns) == ('a', 'b')
     del df['a']
-    assert df.columns == ('b',)
+    assert tuple(df.columns) == ('b',)
     df['c'] = data
-    assert df.columns == ('b', 'c')
+    assert tuple(df.columns) == ('b', 'c')
     df['a'] = data
-    assert df.columns == ('b', 'c', 'a')
+    assert tuple(df.columns) == ('b', 'c', 'a')
 
 
 @pytest.mark.parametrize('nelem', [0, 3, 100, 1000])
@@ -210,7 +210,7 @@ def test_dataframe_slicing():
     # Row slice first 10
     first_10 = df[:10]
     assert len(first_10) == 10
-    assert first_10.columns == tuple(['a', 'b', 'c', 'd'])
+    assert tuple(first_10.columns) == ('a', 'b', 'c', 'd')
     np.testing.assert_equal(first_10['a'].to_array(), ha[:10])
     np.testing.assert_equal(first_10['b'].to_array(), hb[:10])
     np.testing.assert_equal(first_10['c'].to_array(), hc[:10])
@@ -220,7 +220,7 @@ def test_dataframe_slicing():
     # Row slice last 10
     last_10 = df[-10:]
     assert len(last_10) == 10
-    assert last_10.columns == tuple(['a', 'b', 'c', 'd'])
+    assert tuple(last_10.columns) == ('a', 'b', 'c', 'd')
     np.testing.assert_equal(last_10['a'].to_array(), ha[-10:])
     np.testing.assert_equal(last_10['b'].to_array(), hb[-10:])
     np.testing.assert_equal(last_10['c'].to_array(), hc[-10:])
@@ -232,7 +232,7 @@ def test_dataframe_slicing():
     end = 121
     subrange = df[begin:end]
     assert len(subrange) == end - begin
-    assert subrange.columns == tuple(['a', 'b', 'c', 'd'])
+    assert tuple(subrange.columns) == ('a', 'b', 'c', 'd')
     np.testing.assert_equal(subrange['a'].to_array(), ha[begin:end])
     np.testing.assert_equal(subrange['b'].to_array(), hb[begin:end])
     np.testing.assert_equal(subrange['c'].to_array(), hc[begin:end])
@@ -252,14 +252,14 @@ def test_dataframe_loc():
 
     # Full slice
     full = df.loc[:, ['c']]
-    assert full.columns == tuple(['c'])
+    assert tuple(full.columns) == ('c',)
     np.testing.assert_equal(full['c'].to_array(), hc)
 
     begin = 117
     end = 122
     fewer = df.loc[begin:end, ['c', 'd', 'a']]
     assert len(fewer) == end - begin + 1
-    assert fewer.columns == tuple(['c', 'd', 'a'])
+    assert tuple(fewer.columns) == ('c', 'd', 'a')
     np.testing.assert_equal(fewer['a'].to_array(), ha[begin:end + 1])
     np.testing.assert_equal(fewer['c'].to_array(), hc[begin:end + 1])
     np.testing.assert_equal(fewer['d'].to_array(), hd[begin:end + 1])
@@ -272,7 +272,7 @@ def test_dataframe_loc():
     end = 122
     fewer = df2.loc[begin:end, ['c', 'd', 'a']]
     assert len(fewer) == end - begin + 1
-    assert fewer.columns == tuple(['c', 'd', 'a'])
+    assert tuple(fewer.columns) == ('c', 'd', 'a')
     np.testing.assert_equal(fewer['a'].to_array(), ha[begin:end + 1])
     np.testing.assert_equal(fewer['c'].to_array(), hc[begin:end + 1])
     np.testing.assert_equal(fewer['d'].to_array(), hd[begin:end + 1])
diff --git a/pygdf/tests/test_onehot.py b/pygdf/tests/test_onehot.py
index 5d99b95af8d..c73e0ca6b02 100644
--- a/pygdf/tests/test_onehot.py
+++ b/pygdf/tests/test_onehot.py
@@ -64,7 +64,7 @@ def test_onehot_masked():
     out = df.one_hot_encoding('a', cats=list(range(high)),
                               prefix='a', dtype=np.int32)
 
-    assert out.columns == tuple(['a', 'a_0', 'a_1', 'a_2', 'a_3', 'a_4'])
+    assert tuple(out.columns) == ('a', 'a_0', 'a_1', 'a_2', 'a_3', 'a_4')
     np.testing.assert_array_equal(out['a_0'] == 1, arr == 0)
     np.testing.assert_array_equal(out['a_1'] == 1, arr == 1)
     np.testing.assert_array_equal(out['a_2'] == 1, arr == 2)