From 0da66b031e0dd1c0a4e47313739976f99a53cbc5 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 09:32:02 +0000
Subject: [PATCH 01/25] Absolute imports and circular import fixes

---
 tinyquery/api_client.py      |  2 ++
 tinyquery/api_client_test.py |  8 +++++---
 tinyquery/compiler.py        | 37 ++++++++++++++++++------------------
 tinyquery/compiler_test.py   | 33 +++++++++++++++++---------------
 tinyquery/context.py         |  5 +++--
 tinyquery/evaluator.py       | 12 +++++++-----
 tinyquery/evaluator_test.py  | 10 ++++++----
 tinyquery/exceptions.py      |  5 +++++
 tinyquery/lexer.py           |  1 +
 tinyquery/lexer_test.py      |  4 +++-
 tinyquery/parser.py          |  8 +++++---
 tinyquery/parser_test.py     |  6 ++++--
 tinyquery/repeated_util.py   |  3 ++-
 tinyquery/runtime.py         | 14 ++++++++------
 tinyquery/tinyquery.py       | 12 +++++++-----
 tinyquery/tinyquery_test.py  |  4 +++-
 tinyquery/tq_ast.py          |  1 +
 tinyquery/tq_modes.py        |  1 +
 tinyquery/tq_types.py        |  2 ++
 tinyquery/type_context.py    | 14 ++++++++------
 tinyquery/typed_ast.py       |  6 ++++--
 21 files changed, 113 insertions(+), 75 deletions(-)
 create mode 100644 tinyquery/exceptions.py

diff --git a/tinyquery/api_client.py b/tinyquery/api_client.py
index 0a6a436..587eb3e 100644
--- a/tinyquery/api_client.py
+++ b/tinyquery/api_client.py
@@ -4,6 +4,8 @@
 
 This can be used in place of the value returned by apiclient.discovery.build().
 """
+from __future__ import absolute_import
+
 import functools
 import json
 
diff --git a/tinyquery/api_client_test.py b/tinyquery/api_client_test.py
index 541e63b..469787a 100644
--- a/tinyquery/api_client_test.py
+++ b/tinyquery/api_client_test.py
@@ -1,8 +1,10 @@
+from __future__ import absolute_import
+
 import unittest
 
-import api_client
-import tq_types
-import tinyquery
+from tinyquery import api_client
+from tinyquery import tq_types
+from tinyquery import tinyquery
 
 
 class ApiClientTest(unittest.TestCase):
diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index c95ec6f..b46a7de 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -4,19 +4,18 @@
 -Validate that the expression is well-typed.
 -Resolve all select fields to their aliases and types.
 """
+from __future__ import absolute_import
+
 import collections
 import itertools
 
-import parser
-import runtime
-import tq_ast
-import typed_ast
-import type_context
-import tq_types
-
-
-class CompileError(Exception):
-    pass
+from tinyquery import exceptions
+from tinyquery import parser
+from tinyquery import runtime
+from tinyquery import tq_ast
+from tinyquery import typed_ast
+from tinyquery import type_context
+from tinyquery import tq_types
 
 
 def compile_text(text, tables_by_name):
@@ -213,7 +212,7 @@ def compile_table_expr(self, table_expr):
             return method(table_expr)
 
     def compile_table_expr_TableId(self, table_expr):
-        import tinyquery  # TODO(colin): fix circular import
+        from tinyquery import tinyquery  # TODO(colin): fix circular import
         table = self.tables_by_name[table_expr.name]
         if isinstance(table, tinyquery.Table):
             return self.compile_table_ref(table_expr, table)
@@ -294,7 +293,7 @@ def compile_joined_table(self, table_expr):
         elif isinstance(table_expr, tq_ast.TableId):
             alias = table_expr.name
         else:
-            raise CompileError('Table expression must have an alias name.')
+            raise exceptions.CompileError('Table expression must have an alias name.')
         result_ctx = compiled_table.type_ctx.context_with_full_alias(alias)
         compiled_table = compiled_table.with_type_ctx(result_ctx)
         return compiled_table, alias
@@ -366,7 +365,7 @@ def compile_join_field(expr, join_type):
                                                      left_column_id)]
                     # Fall through to the error case if the aliases are the
                     # same for both sides.
-            raise CompileError('JOIN conditions must consist of an AND of = '
+            raise exceptions.CompileError('JOIN conditions must consist of an AND of = '
                                'comparisons between two field on distinct '
                                'tables. Got expression %s' % expr)
         return [compile_join_field(expr, join_type)
@@ -434,7 +433,7 @@ def compile_groups(self, groups, select_fields, aliases, table_ctx):
     def compile_select_field(self, expr, alias, within_clause, type_ctx):
         if within_clause is not None and within_clause != 'RECORD' and (
                     expr.args[0].name.split('.')[0] != within_clause):
-            raise CompileError('WITHIN clause syntax error')
+            raise exceptions.CompileError('WITHIN clause syntax error')
         else:
             compiled_expr = self.compile_expr(expr, type_ctx)
             return typed_ast.SelectField(compiled_expr, alias, within_clause)
@@ -485,7 +484,7 @@ def compile_UnaryOperator(self, expr, type_ctx):
         try:
             result_type = func.check_types(compiled_val.type)
         except TypeError:
-            raise CompileError('Invalid type for operator {}: {}'.format(
+            raise exceptions.CompileError('Invalid type for operator {}: {}'.format(
                 expr.operator, [compiled_val.type]))
         return typed_ast.FunctionCall(func, [compiled_val], result_type)
 
@@ -501,7 +500,7 @@ def compile_BinaryOperator(self, expr, type_ctx):
             result_type = func.check_types(compiled_left.type,
                                            compiled_right.type)
         except TypeError:
-            raise CompileError('Invalid types for operator {}: {}'.format(
+            raise exceptions.CompileError('Invalid types for operator {}: {}'.format(
                 expr.operator, [arg.type for arg in [compiled_left,
                                                      compiled_right]]))
 
@@ -516,7 +515,7 @@ def compile_FunctionCall(self, expr, type_ctx):
         # that the evaluator knows to change the context.
         if self.is_innermost_aggregate(expr):
             if type_ctx.aggregate_context is None:
-                raise CompileError('Unexpected aggregate function.')
+                raise exceptions.CompileError('Unexpected aggregate function.')
             sub_expr_ctx = type_ctx.aggregate_context
             ast_type = typed_ast.AggregateFunctionCall
         else:
@@ -530,7 +529,7 @@ def compile_FunctionCall(self, expr, type_ctx):
             result_type = func.check_types(
                 *(arg.type for arg in compiled_args))
         except TypeError:
-            raise CompileError('Invalid types for function {}: {}'.format(
+            raise exceptions.CompileError('Invalid types for function {}: {}'.format(
                 expr.name, [arg.type for arg in compiled_args]))
         return ast_type(func, compiled_args, result_type)
 
@@ -557,7 +556,7 @@ def get_aliases(cls, select_field_list):
         for alias in proposed_aliases:
             if alias is not None:
                 if alias in used_aliases:
-                    raise CompileError(
+                    raise exceptions.CompileError(
                         'Ambiguous column name {}.'.format(alias))
                 used_aliases.add(alias)
 
diff --git a/tinyquery/compiler_test.py b/tinyquery/compiler_test.py
index f05bb8c..7aeb52c 100644
--- a/tinyquery/compiler_test.py
+++ b/tinyquery/compiler_test.py
@@ -1,18 +1,21 @@
 # TODO(colin): fix these lint errors (http://pep8.readthedocs.io/en/release-1.7.x/intro.html#error-codes)
 # pep8-disable:E122
+from __future__ import absolute_import
+
 import collections
 import datetime
 import unittest
 
-import compiler
-import context
-import runtime
-import tinyquery
-import tq_ast
-import tq_modes
-import tq_types
-import type_context
-import typed_ast
+from tinyquery import exceptions
+from tinyquery import compiler
+from tinyquery import context
+from tinyquery import runtime
+from tinyquery import tinyquery
+from tinyquery import tq_ast
+from tinyquery import tq_modes
+from tinyquery import tq_types
+from tinyquery import type_context
+from tinyquery import typed_ast
 
 
 class CompilerTest(unittest.TestCase):
@@ -124,7 +127,7 @@ def assert_compiled_select(self, text, expected_ast):
         self.assertEqual(expected_ast, ast)
 
     def assert_compile_error(self, text):
-        self.assertRaises(compiler.CompileError, compiler.compile_text,
+        self.assertRaises(exceptions.CompileError, compiler.compile_text,
                           text, self.tables_by_name)
 
     def make_type_context(self, table_column_type_triples,
@@ -178,7 +181,7 @@ def test_unary_operator(self):
         )
 
     def test_mistyped_unary_operator(self):
-        with self.assertRaises(compiler.CompileError) as context:
+        with self.assertRaises(exceptions.CompileError) as context:
             compiler.compile_text('SELECT -strings FROM rainbow_table',
                                   self.tables_by_name)
         self.assertTrue('Invalid type for operator' in str(context.exception))
@@ -187,12 +190,12 @@ def test_strange_arithmetic(self):
         try:
             compiler.compile_text('SELECT times + ints + floats + bools FROM '
                                   'rainbow_table', self.tables_by_name)
-        except compiler.CompileError:
+        except exceptions.CompileError:
             self.fail('Compiler exception on arithmetic across all numeric '
                       'types.')
 
     def test_mistyped_binary_operator(self):
-        with self.assertRaises(compiler.CompileError) as context:
+        with self.assertRaises(exceptions.CompileError) as context:
             compiler.compile_text('SELECT ints CONTAINS strings FROM '
                                   'rainbow_table',
                                   self.tables_by_name)
@@ -241,7 +244,7 @@ def test_function_calls(self):
         )
 
     def test_mistyped_function_call(self):
-        with self.assertRaises(compiler.CompileError) as context:
+        with self.assertRaises(exceptions.CompileError) as context:
             compiler.compile_text('SELECT SUM(strings) FROM rainbow_table',
                                   self.tables_by_name)
         self.assertTrue('Invalid types for function' in str(context.exception))
@@ -1001,7 +1004,7 @@ def test_within_clause(self):
                     self.make_type_context([]))))
 
     def test_within_clause_error(self):
-        with self.assertRaises(compiler.CompileError) as context:
+        with self.assertRaises(exceptions.CompileError) as context:
             compiler.compile_text(
                 'SELECT r1.s, COUNT(r1.s) WITHIN r2 AS '
                 'num_s_in_r1 FROM record_table',
diff --git a/tinyquery/context.py b/tinyquery/context.py
index 9a3bae5..6f03e71 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -2,13 +2,14 @@
 
 It is the basic container for intermediate data when evaluating a query.
 """
+from __future__ import absolute_import
 
 import collections
 import itertools
 import logging
 
-import repeated_util
-import tq_modes
+from tinyquery import repeated_util
+from tinyquery import tq_modes
 
 
 class Context(object):
diff --git a/tinyquery/evaluator.py b/tinyquery/evaluator.py
index 6d803fc..8278034 100644
--- a/tinyquery/evaluator.py
+++ b/tinyquery/evaluator.py
@@ -1,12 +1,14 @@
 # TODO(colin): fix these lint errors (http://pep8.readthedocs.io/en/release-1.7.x/intro.html#error-codes)
 # pep8-disable:E115,E128
+from __future__ import absolute_import
+
 import collections
 
-import context
-import tq_ast
-import tq_modes
-import typed_ast
-import tq_types
+from tinyquery import context
+from tinyquery import tq_ast
+from tinyquery import tq_modes
+from tinyquery import typed_ast
+from tinyquery import tq_types
 
 
 class Evaluator(object):
diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 61d2c2c..32bc774 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1,15 +1,17 @@
 # TODO(colin): fix these lint errors (http://pep8.readthedocs.io/en/release-1.7.x/intro.html#error-codes)
 # pep8-disable:E122,E127,E128
+from __future__ import absolute_import
+
 import collections
 import contextlib
 import datetime
 import mock
 import unittest
 
-import context
-import tinyquery
-import tq_modes
-import tq_types
+from tinyquery import context
+from tinyquery import tinyquery
+from tinyquery import tq_modes
+from tinyquery import tq_types
 
 
 # TODO(Samantha): Not all modes are nullable.
diff --git a/tinyquery/exceptions.py b/tinyquery/exceptions.py
new file mode 100644
index 0000000..412cc61
--- /dev/null
+++ b/tinyquery/exceptions.py
@@ -0,0 +1,5 @@
+from __future__ import absolute_import
+
+
+class CompileError(Exception):
+    pass
diff --git a/tinyquery/lexer.py b/tinyquery/lexer.py
index 61cccd3..3454d69 100644
--- a/tinyquery/lexer.py
+++ b/tinyquery/lexer.py
@@ -1,4 +1,5 @@
 """The lexer turns a query string into a stream of tokens."""
+from __future__ import absolute_import
 
 from ply import lex
 
diff --git a/tinyquery/lexer_test.py b/tinyquery/lexer_test.py
index 3d1354c..3b959a4 100644
--- a/tinyquery/lexer_test.py
+++ b/tinyquery/lexer_test.py
@@ -1,6 +1,8 @@
+from __future__ import absolute_import
+
 import unittest
 
-import lexer
+from tinyquery import lexer
 
 
 plus = ('PLUS', '+')
diff --git a/tinyquery/parser.py b/tinyquery/parser.py
index 7949617..867d9d0 100644
--- a/tinyquery/parser.py
+++ b/tinyquery/parser.py
@@ -1,10 +1,12 @@
 """The parser turns a stream of tokens into an AST."""
+from __future__ import absolute_import
+
 import os
 
 from ply import yacc
 
-import tq_ast
-import lexer
+from tinyquery import tq_ast
+from tinyquery import lexer
 
 
 tokens = lexer.tokens
@@ -484,6 +486,6 @@ def parse_text(text):
     if should_rebuild_parser:
         parser = yacc.yacc()
     else:
-        import parsetab
+        from tinyquery import parsetab
         parser = yacc.yacc(debug=0, write_tables=0, tabmodule=parsetab)
     return parser.parse(text, lexer=lexer.get_lexer())
diff --git a/tinyquery/parser_test.py b/tinyquery/parser_test.py
index 370d7c0..139107f 100644
--- a/tinyquery/parser_test.py
+++ b/tinyquery/parser_test.py
@@ -1,7 +1,9 @@
+from __future__ import absolute_import
+
 import unittest
 
-import tq_ast
-import parser
+from tinyquery import tq_ast
+from tinyquery import parser
 
 
 def literal(value):
diff --git a/tinyquery/repeated_util.py b/tinyquery/repeated_util.py
index d88ca97..a4a8cb6 100644
--- a/tinyquery/repeated_util.py
+++ b/tinyquery/repeated_util.py
@@ -5,8 +5,9 @@
 These functions allow us to flatten into non-repeated columns to apply various
 operations and then unflatten back into repeated columns afterwards.
 """
+from __future__ import absolute_import
 
-import tq_modes
+from tinyquery import tq_modes
 
 
 def rebuild_column_values(repetitions, values, result):
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 12d0eab..9cf0edd 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -1,4 +1,6 @@
 """Implementation of the standard built-in functions."""
+from __future__ import absolute_import
+
 import abc
 import datetime
 import functools
@@ -10,11 +12,11 @@
 
 import arrow
 
-import compiler
-import context
-import repeated_util
-import tq_types
-import tq_modes
+from tinyquery import exceptions
+from tinyquery import context
+from tinyquery import repeated_util
+from tinyquery import tq_types
+from tinyquery import tq_modes
 
 
 def pass_through_none(fn):
@@ -1326,7 +1328,7 @@ def get_func(name):
     elif name in _AGGREGATE_FUNCTIONS:
         return _AGGREGATE_FUNCTIONS[name]
     else:
-        raise compiler.CompileError('Unknown function: {}'.format(name))
+        raise exceptions.CompileError('Unknown function: {}'.format(name))
 
 
 def is_aggregate_func(name):
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index f5ebf46..fbc8f72 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -1,12 +1,14 @@
 """Implementation of the TinyQuery service."""
+from __future__ import absolute_import
+
 import collections
 import json
 
-import compiler
-import context
-import evaluator
-import tq_modes
-import tq_types
+from tinyquery import compiler
+from tinyquery import context
+from tinyquery import evaluator
+from tinyquery import tq_modes
+from tinyquery import tq_types
 
 
 class TinyQueryError(Exception):
diff --git a/tinyquery/tinyquery_test.py b/tinyquery/tinyquery_test.py
index 40e5f74..4d27a8d 100644
--- a/tinyquery/tinyquery_test.py
+++ b/tinyquery/tinyquery_test.py
@@ -1,7 +1,9 @@
+from __future__ import absolute_import
+
 import json
 import unittest
 
-import tinyquery
+from tinyquery import tinyquery
 
 
 class TinyQueryTest(unittest.TestCase):
diff --git a/tinyquery/tq_ast.py b/tinyquery/tq_ast.py
index e8c362f..0ebd502 100644
--- a/tinyquery/tq_ast.py
+++ b/tinyquery/tq_ast.py
@@ -3,6 +3,7 @@
 This AST format is desinged to be easy to parse into. See typed_ast for the AST
 format that is used during the evaluation step.
 """
+from __future__ import absolute_import
 
 import collections
 
diff --git a/tinyquery/tq_modes.py b/tinyquery/tq_modes.py
index 91f293c..d025f8e 100644
--- a/tinyquery/tq_modes.py
+++ b/tinyquery/tq_modes.py
@@ -1,5 +1,6 @@
 """ Defines the valid modes. Currently we just use strings to identify them.
 """
+from __future__ import absolute_import
 
 NULLABLE = "NULLABLE"
 REQUIRED = "REQUIRED"
diff --git a/tinyquery/tq_types.py b/tinyquery/tq_types.py
index 572778b..094e924 100644
--- a/tinyquery/tq_types.py
+++ b/tinyquery/tq_types.py
@@ -1,5 +1,7 @@
 """Defines the valid types. Currently we just uses strings to identify them.
 """
+from __future__ import absolute_import
+
 import arrow
 
 # TODO(Samantha): Structs.
diff --git a/tinyquery/type_context.py b/tinyquery/type_context.py
index 9c2a7e8..8f8a1f4 100644
--- a/tinyquery/type_context.py
+++ b/tinyquery/type_context.py
@@ -1,9 +1,11 @@
+from __future__ import absolute_import
+
 import collections
 import re
 
-import compiler
-import tq_types
-import typed_ast
+from tinyquery import exceptions
+from tinyquery import tq_types
+from tinyquery import typed_ast
 
 
 # TODO(Samantha): Should checking modes go here?
@@ -93,7 +95,7 @@ def union_contexts(cls, contexts):
                 if full_column in result_columns:
                     if result_columns[full_column] == col_type:
                         continue
-                    raise compiler.CompileError(
+                    raise exceptions.CompileError(
                         'Incompatible types when performing union on field '
                         '{}: {} vs. {}'.format(full_column,
                                                result_columns[full_column],
@@ -134,12 +136,12 @@ def column_ref_for_name(self, name):
         if len(possible_results) == 1:
             return possible_results[0]
         elif len(possible_results) > 1:
-            raise compiler.CompileError('Ambiguous field: {}'.format(name))
+            raise exceptions.CompileError('Ambiguous field: {}'.format(name))
         else:
             if self.implicit_column_context is not None:
                 return self.implicit_column_context.column_ref_for_name(name)
             else:
-                raise compiler.CompileError('Field not found: {}'.format(name))
+                raise exceptions.CompileError('Field not found: {}'.format(name))
 
     def context_with_subquery_alias(self, subquery_alias):
         """Handle the case where a subquery has an alias.
diff --git a/tinyquery/typed_ast.py b/tinyquery/typed_ast.py
index dbc0f43..e9a4bca 100644
--- a/tinyquery/typed_ast.py
+++ b/tinyquery/typed_ast.py
@@ -1,8 +1,9 @@
 """A set of AST classes with types and aliases filled in."""
+from __future__ import absolute_import
 
 import collections
-import type_context
-import tq_modes
+
+from tinyquery import tq_modes
 
 
 class Select(collections.namedtuple(
@@ -74,6 +75,7 @@ def __init__(self, *_, **__):
 class NoTable(collections.namedtuple('NoTable', []), TableExpression):
     @property
     def type_ctx(self):
+        from tinyquery import type_context  # To avoid circular import
         return type_context.TypeContext.from_full_columns(
             collections.OrderedDict())
 

From c9ff3bd50d3328bc46138ea1f960bb4c4c81850c Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 09:35:04 +0000
Subject: [PATCH 02/25] Removing iterkeys

---
 tinyquery/api_client.py | 2 +-
 tinyquery/compiler.py   | 2 +-
 tinyquery/context.py    | 6 +++---
 tinyquery/tinyquery.py  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tinyquery/api_client.py b/tinyquery/api_client.py
index 0a6a436..1d203c9 100644
--- a/tinyquery/api_client.py
+++ b/tinyquery/api_client.py
@@ -153,7 +153,7 @@ def insert(self, projectId, body):
                 create_disposition, write_disposition)
         else:
             assert False, 'Unknown job type: {}'.format(
-                body['configuration'].keys())
+                list(body['configuration'].keys()))
 
     @staticmethod
     def _get_config_table(config, key):
diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index c95ec6f..53942bc 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -93,7 +93,7 @@ def expand_select_fields(self, select_fields, table_expr):
         """
         table_ctx = table_expr.type_ctx
         star_select_fields = []
-        for table_name, col_name in table_ctx.columns.iterkeys():
+        for table_name, col_name in table_ctx.columns:
             if table_name is not None:
                 col_ref = table_name + '.' + col_name
             else:
diff --git a/tinyquery/context.py b/tinyquery/context.py
index 9a3bae5..78d9665 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -72,7 +72,7 @@ def context_from_table(table, type_context):
     any_column = table.columns.itervalues().next()
     new_columns = collections.OrderedDict([
         (column_name, column)
-        for (column_name, column) in zip(type_context.columns.iterkeys(),
+        for (column_name, column) in zip(type_context.columns,
                                          table.columns.itervalues())
     ])
     return Context(len(any_column.values), new_columns, None)
@@ -83,7 +83,7 @@ def context_with_overlayed_type_context(context, type_context):
     any_column = context.columns.itervalues().next()
     new_columns = collections.OrderedDict([
         (column_name, column)
-        for (column_name, column) in zip(type_context.columns.iterkeys(),
+        for (column_name, column) in zip(type_context.columns,
                                          context.columns.itervalues())
     ])
     return Context(len(any_column.values), new_columns, None)
@@ -193,7 +193,7 @@ def mask_context(context, mask):
             for col, values in zip(context.columns.itervalues(),
                                    orig_column_values)]
         new_columns = collections.OrderedDict([
-            (name, col) for name, col in zip(context.columns.iterkeys(),
+            (name, col) for name, col in zip(context.columns,
                                              new_values)])
 
     return Context(
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index f5ebf46..9e93c72 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -199,7 +199,7 @@ def get_all_tables(self):
     def get_table_names_for_dataset(self, dataset):
         # TODO(alan): Improve this to use a more first-class dataset structure.
         return [full_table[len(dataset + '.'):]
-                for full_table in self.tables_by_name.iterkeys()
+                for full_table in self.tables_by_name
                 if full_table.startswith(dataset + '.')]
 
     def get_all_table_info_in_dataset(self, project_id, dataset):

From 3f4c436a60d5a9b214599a0d8640b8feed2a121c Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 09:44:35 +0000
Subject: [PATCH 03/25] Removing filters

---
 tinyquery/compiler.py |  6 +++---
 tinyquery/context.py  |  7 +++----
 tinyquery/runtime.py  | 17 +++++++++--------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 53942bc..46d36f3 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -112,9 +112,9 @@ def expand_select_fields(self, select_fields, table_expr):
             elif (field.expr and isinstance(field.expr, tq_ast.ColumnId) and
                   field.expr.name.endswith('.*')):
                 prefix = field.expr.name[:-len('.*')]
-                record_star_fields = filter(
-                    lambda f: f.alias.startswith(prefix),
-                    star_select_fields)
+                record_star_fields = [f
+                                      for f in star_select_fields
+                                      if f.alias.startswith(prefix)]
                 result_fields.extend(record_star_fields)
             else:
                 result_fields.append(field)
diff --git a/tinyquery/context.py b/tinyquery/context.py
index 78d9665..08f72dd 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -115,9 +115,8 @@ def mask_context(context, mask):
     # behavior as function evaluation on repeated fields.  Fix.
     if mask.mode == tq_modes.REPEATED:
         num_rows = len(
-            filter(
-                None,
-                (len(filter(None, row)) for row in mask.values)))
+            [r for r in (any(row) for row in mask.values) if r]
+        )
         new_columns = collections.OrderedDict()
         for col_name, col in context.columns.iteritems():
             if col.mode == tq_modes.REPEATED:
@@ -184,7 +183,7 @@ def mask_context(context, mask):
         orig_column_values = [
             col.values for col in context.columns.itervalues()]
         mask_values = mask.values
-        num_rows = len(filter(None, mask.values))
+        num_rows = len([v for v in mask.values if v])
         new_values = [
             Column(
                 type=col.type,
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 12d0eab..551a28f 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -362,9 +362,9 @@ def _evaluate(self, num_rows, *cols):
         rows = zip(*[col.values for col in cols])
 
         def first_nonnull(row):
-            result = filter(lambda x: x is not None, row)
-            if result:
-                return result[0]
+            for x in row:
+                if x is not None:
+                    return x
             return None
         values = map(first_nonnull, rows)
         return context.Column(type=result_type, mode=tq_modes.NULLABLE,
@@ -617,8 +617,7 @@ def check_types(self, arg):
     def _evaluate(self, num_rows, column):
         return context.Column(type=self.check_types(column.type),
                               mode=tq_modes.NULLABLE,
-                              values=[self.func(filter(lambda x: x is not None,
-                                                       column.values))])
+                              values=[self.func([x for x in column.values if x is not None])])
 
 
 class SumFunction(AggregateFunction):
@@ -1020,9 +1019,11 @@ def _parse_property_name(self, json_path):
             raise ValueError(
                 'Invalid json path expression. Cannot end in ".".')
         prop_name_plus = json_path[1:]
-        next_separator_positions = filter(
-            lambda pos: pos != -1,
-            [prop_name_plus.find('.'), prop_name_plus.find('[')])
+        next_separator_positions = [
+            pos
+            for pos in [prop_name_plus.find('.'), prop_name_plus.find('[')]
+            if pos != -1
+        ]
 
         if next_separator_positions:
             end_idx = min(next_separator_positions)

From 94e36c63f642953925cfc55f915715535f7aeb20 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 10:47:03 +0000
Subject: [PATCH 04/25] Removing map()

---
 tinyquery/compiler.py       |   3 +-
 tinyquery/evaluator.py      |   6 +-
 tinyquery/evaluator_test.py |   2 +-
 tinyquery/runtime.py        | 112 ++++++++++++++++--------------------
 tinyquery/tinyquery.py      |   2 +-
 5 files changed, 57 insertions(+), 68 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 46d36f3..3f33e0e 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -265,8 +265,7 @@ def compile_table_expr_Join(self, table_expr):
             [table_expr.base],
             (join_part.table_expr for join_part in table_expr.join_parts)
         )
-        compiled_result = map(self.compile_joined_table,
-                              table_expressions)
+        compiled_result = [self.compile_joined_table(x) for x in table_expressions]
         compiled_table_exprs, compiled_aliases = zip(*compiled_result)
         type_contexts = [compiled_table.type_ctx
                          for compiled_table in compiled_table_exprs]
diff --git a/tinyquery/evaluator.py b/tinyquery/evaluator.py
index 6d803fc..588eb0d 100644
--- a/tinyquery/evaluator.py
+++ b/tinyquery/evaluator.py
@@ -165,11 +165,11 @@ def evaluate_orderings(self, overall_context, select_context,
         reversed_sort_by_indexes = collections.OrderedDict(
             reversed(list(sort_by_indexes.items())))
 
-        t_all_values = map(list, zip(*all_values))
+        t_all_values = [list(z) for z in zip(*all_values)]
         for index, is_ascending in reversed_sort_by_indexes.iteritems():
             t_all_values.sort(key=lambda x: (x[index]),
                               reverse=not is_ascending)
-        ordered_values = map(list, zip(*t_all_values))
+        ordered_values = [list(z) for z in zip(*t_all_values)]
         # If we started evaluating an ordering over 0 rows,
         # all_values was originally [[], [], [], ...], i.e. the empty list for
         # each column, but now ordered_values is just the empty list, since
@@ -364,7 +364,7 @@ def eval_table_TableUnion(self, table_expr):
     def eval_table_Join(self, table_expr):
         base_context = self.evaluate_table_expr(table_expr.base)
         rhs_tables, join_types = zip(*table_expr.tables)
-        other_contexts = map(self.evaluate_table_expr, rhs_tables)
+        other_contexts = [self.evaluate_table_expr(x) for x in rhs_tables]
 
         lhs_context = base_context
 
diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 61d2c2c..98f69cb 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -831,7 +831,7 @@ def test_hash(self):
             'SELECT HASH(floats) FROM rainbow_table',
             self.make_context([
                 ('f0_', tq_types.INT,
-                 map(hash, [1.41, 2.72, float('infinity')]))]))
+                 [hash(x) for x in [1.41, 2.72, float('infinity')]])]))
 
     def test_null_hash(self):
         self.assert_query_result(
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 551a28f..87a3d71 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -158,9 +158,8 @@ def check_types(self, type1, type2):
             return tq_types.INT
 
     def _evaluate(self, num_rows, column1, column2):
-        values = map(lambda (x, y):
-                     None if None in (x, y) else self.func(x, y),
-                     zip(column1.values, column2.values))
+        values = [None if None in (x, y) else self.func(x, y)
+                  for x, y in zip(column1.values, column2.values)]
         # TODO(Samantha): Code smell incoming
         t = self.check_types(column1.type, column2.type)
         return context.Column(type=t, mode=tq_modes.NULLABLE, values=values)
@@ -210,8 +209,8 @@ def _evaluate(self, num_rows, column1, column2):
             if other_column.type == tq_types.STRING:
                 # Convert that string to datetime if we can.
                 try:
-                    converted = map(lambda x: arrow.get(x).to('UTC').naive,
-                                    other_column.values)
+                    converted = [arrow.get(x).to('UTC').native
+                                 for x in other_column.values]
                 except:
                     raise TypeError('Invalid comparison on timestamp, '
                                     'expected numeric type or ISO8601 '
@@ -219,10 +218,8 @@ def _evaluate(self, num_rows, column1, column2):
             elif other_column.type in tq_types.NUMERIC_TYPE_SET:
                 # Cast that numeric to a float accounting for microseconds and
                 # then to a datetime.
-                converted = map(
-                    pass_through_none(
-                        lambda x: arrow.get(float(x) / 1E6).to('UTC').naive),
-                    other_column.values)
+                convert = pass_through_none(lambda x: arrow.get(float(x) / 1E6).to('UTC').naive)
+                converted = [convert(x) for x in other_column.values]
 
             else:
                 # No other way to compare a timestamp with anything other than
@@ -237,9 +234,8 @@ def _evaluate(self, num_rows, column1, column2):
                                      mode=other_column.mode,
                                      values=converted)
 
-        values = map(lambda (x, y):
-                     None if None in (x, y) else self.func(x, y),
-                     zip(column1.values, column2.values))
+        values = [None if None in (x, y) else self.func(x, y)
+                  for x, y in zip(column1.values, column2.values)]
         return context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -254,9 +250,8 @@ def check_types(self, type1, type2):
         return tq_types.BOOL
 
     def _evaluate(self, num_rows, column1, column2):
-        values = map(lambda (x, y):
-                     None if None in (x, y) else self.func(x, y),
-                     zip(column1.values, column2.values))
+        values = [None if None in (x, y) else self.func(x, y)
+                  for x, y in zip(column1.values, column2.values)]
         return context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -271,7 +266,7 @@ def check_types(self, arg):
         return tq_types.INT
 
     def _evaluate(self, num_rows, column):
-        values = map(self.func, column.values)
+        values = [self.func(x) for x in column.values]
         return context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -284,7 +279,7 @@ def check_types(self, arg):
         return tq_types.BOOL
 
     def _evaluate(self, num_rows, column):
-        values = map(self.func, column.values)
+        values = [self.func(x) for x in column.values]
         return context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -302,7 +297,7 @@ def check_types(self, arg):
         return tq_types.FLOAT
 
     def _evaluate(self, num_rows, column):
-        values = map(self.func, column.values)
+        values = [self.func(x) for x in column.values]
         return context.Column(type=tq_types.FLOAT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -341,8 +336,8 @@ def check_types(self, arg1, arg2):
 
     def _evaluate(self, num_rows, column1, column2):
         t = self.check_types(column1.type, column2.type)
-        values = map(lambda (x, y): x if x is not None else y,
-                     zip(column1.values, column2.values))
+        values = [x if x is not None else y
+                  for x, y in zip(column1.values, column2.values)]
         return context.Column(type=t, mode=tq_modes.NULLABLE, values=values)
 
 
@@ -366,7 +361,7 @@ def first_nonnull(row):
                 if x is not None:
                     return x
             return None
-        values = map(first_nonnull, rows)
+        values = [first_nonnull(r) for r in rows]
         return context.Column(type=result_type, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -377,7 +372,8 @@ def check_types(self, arg):
 
     def _evaluate(self, num_rows, column):
         # TODO: Use CityHash.
-        values = map(pass_through_none(hash), column.values)
+        hash_fn = pass_through_none(hash)
+        values = [hash_fn(x) for x in column.values]
         return context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -389,8 +385,8 @@ def check_types(self, arg):
         return tq_types.FLOAT
 
     def _evaluate(self, num_rows, column):
-        values = map(pass_through_none(math.floor),
-                     column.values)
+        floor = pass_through_none(math.floor)
+        values = [floor(x) for x in column.values]
         return context.Column(type=tq_types.FLOAT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -412,7 +408,7 @@ def string_converter(arg):
             converter = string_converter
         elif column.type == tq_types.TIMESTAMP:
             return timestamp_to_usec.evaluate(num_rows, column)
-        values = map(converter, column.values)
+        values = [converter(x) for x in  column.values]
         return context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -578,8 +574,7 @@ def check_types(self, arg1, *arg_types):
     def _evaluate(self, num_rows, arg1, *other_args):
         values = [val1 in val_list
                   for val1, val_list in zip(arg1.values,
-                                            zip(*(map(lambda x: x.values,
-                                                      other_args))))]
+                                            zip(*[x.values for x in other_args]))]
         return context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -591,8 +586,8 @@ def check_types(self, *arg_types):
         return tq_types.STRING
 
     def _evaluate(self, num_rows, *columns):
-        values = map(lambda strs: None if None in strs else ''.join(strs),
-                     zip(*map(lambda x: x.values, columns)))
+        values = [None if None in strs else ''.join(strs)
+                  for strs in zip(*[x.values for x in columns])]
         return context.Column(tq_types.STRING, tq_modes.NULLABLE,
                               values=values)
 
@@ -602,7 +597,8 @@ def check_types(self, arg_type):
         return tq_types.STRING
 
     def _evaluate(self, num_rows, column):
-        values = map(pass_through_none(str), column.values)
+        pass_through_none_str = pass_through_none(str)
+        values = [pass_through_none_str(x) for x in column.values]
         return context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -724,9 +720,8 @@ def check_types(self, type1, type2):
 
     def _evaluate(self, num_rows, column1, column2):
         if len(column1.values) == len(column2.values):
-            values = map(lambda (v1, v2): None if None in (v1, v2) else
-                         v2 in v1,
-                         zip(column1.values, column2.values))
+            values = [None if None in (v1, v2) else v2 in v1
+                      for v1, v2 in zip(column1.values, column2.values)]
             return context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE,
                                   values=values)
 
@@ -749,13 +744,12 @@ def _evaluate(self, num_rows, column):
             # epoch here, whereas arrow wants a unix timestamp, with possible
             # decimal part representing microseconds.
             converter = lambda ts: float(ts) / 1E6
+        convert_fn = pass_through_none(
+                # arrow.get parses ISO8601 strings and int/float unix
+                # timestamps without a format parameter
+                lambda ts: arrow.get(converter(ts)).to('UTC').naive)
         try:
-            values = map(
-                pass_through_none(
-                    # arrow.get parses ISO8601 strings and int/float unix
-                    # timestamps without a format parameter
-                    lambda ts: arrow.get(converter(ts)).to('UTC').naive),
-                column.values)
+            values = [convert_fn(x) for x in column.values]
         except:
             raise TypeError(
                 'TIMESTAMP requires an ISO8601 string or unix timestamp in '
@@ -775,7 +769,7 @@ def check_types(self, type1):
         return self.type
 
     def _evaluate(self, num_rows, column1):
-        values = map(self.extractor, column1.values)
+        values = [self.extractor(x) for x in column1.values]
         return context.Column(type=self.type, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -807,20 +801,19 @@ def adder(ts):
                 year = ts.year + (ts.month - 1 + num_intervals) // 12
                 month = 1 + (ts.month - 1 + num_intervals) % 12
                 return ts.replace(year=year, month=month)
-            values = map(adder, timestamps.values)
+            values = [adder(x) for x in timestamps.values]
         elif interval_type == 'YEAR':
-            values = map(
-                pass_through_none(
-                    lambda ts: ts.replace(year=(ts.year + num_intervals))),
-                timestamps.values)
+            convert_fn = pass_through_none(
+                    lambda ts: ts.replace(year=(ts.year + num_intervals)))
+            values = [convert_fn(x) for x in timestamps.values]
         else:
             # All of the other valid options for bigquery are also valid
             # keyword arguments to datetime.timedelta, when lowercased and
             # pluralized.
             python_interval_name = interval_type.lower() + 's'
             delta = datetime.timedelta(**{python_interval_name: num_intervals})
-            values = map(pass_through_none(lambda ts: ts + delta),
-                         timestamps.values)
+            convert_fn = pass_through_none(lambda ts: ts + delta)
+            values = [convert_fn(x) for x in timestamps.values]
 
         return context.Column(type=tq_types.TIMESTAMP, mode=tq_modes.NULLABLE,
                               values=values)
@@ -834,9 +827,8 @@ def check_types(self, type1, type2):
         return tq_types.INT
 
     def _evaluate(self, num_rows, lhs_ts, rhs_ts):
-        values = map(lambda (lhs, rhs): None if None in (lhs, rhs) else
-                     int(round((lhs - rhs).total_seconds() / 24 / 3600)),
-                     zip(lhs_ts.values, rhs_ts.values))
+        values = [None if None in (lhs, rhs) else int(round((lhs - rhs).total_seconds() / 24 / 3600))
+                  for lhs, rhs in zip(lhs_ts.values, rhs_ts.values)]
         return context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -892,7 +884,7 @@ def _year_truncate(self, ts):
     def _evaluate(self, num_rows, timestamps):
         truncate_fn = pass_through_none(
             getattr(self, '_%s_truncate' % self.interval))
-        values = map(truncate_fn, timestamps.values)
+        values = [truncate_fn(x) for x in timestamps.values]
         return context.Column(type=tq_types.TIMESTAMP, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -919,11 +911,10 @@ def _evaluate(self, num_rows, unix_timestamps, weekdays):
         timestamps = TimestampFunction().evaluate(num_rows, unix_timestamps)
         truncated = TimestampShiftFunction('day').evaluate(
             num_rows, timestamps)
-        values = map(
-            pass_through_none(
+        convert = pass_through_none(
                 lambda ts: ts + datetime.timedelta(
-                    days=(weekday - self._weekday_from_ts(ts)))),
-            truncated.values)
+                    days=(weekday - self._weekday_from_ts(ts))))
+        values = [convert(x) for x in truncated.values]
         ts_result = context.Column(
             type=tq_types.TIMESTAMP, mode=tq_modes.NULLABLE, values=values)
         return timestamp_to_usec.evaluate(num_rows, ts_result)
@@ -945,9 +936,8 @@ def check_types(self, type1, type2):
     def _evaluate(self, num_rows, unix_timestamps, formats):
         format_str = _ensure_literal(formats.values)
         timestamps = TimestampFunction().evaluate(num_rows, unix_timestamps)
-        values = map(
-            pass_through_none(lambda ts: ts.strftime(format_str)),
-            timestamps.values)
+        convert = pass_through_none(lambda ts: ts.strftime(format_str))
+        values = [convert(x) for x in timestamps.values]
         return context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE,
                               values=values)
 
@@ -1084,9 +1074,9 @@ def _extract_by_json_path(self, parsed_json_expr, json_path):
 
     def _evaluate(self, num_rows, json_expressions, json_paths):
         json_path = _ensure_literal(json_paths.values)
-        parsed_json = map(
-            pass_through_none(json.loads),
-            json_expressions.values)
+        json_load = pass_through_none(json.loads)
+        parsed_json = [json_load(x)
+                       for x in json_expressions.values]
         if not json_path.startswith('$'):
             raise ValueError(
                 'Invalid json path expression.  Must start with $.')
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index 9e93c72..ae91889 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -89,7 +89,7 @@ def run_cast_function(key, mode, value):
             if value is None:
                 return None
             elif mode == tq_modes.REPEATED:
-                return map(cast_function, value)
+                return [cast_function(x) for x in value]
             else:
                 if isinstance(value, str):
                     return cast_function(value.decode('utf-8'))

From 8d56839f0afdd0ed2b782c4fae0d41f875269789 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 11:03:02 +0000
Subject: [PATCH 05/25] Python 3 string handling

---
 tinyquery/compiler.py     | 2 +-
 tinyquery/tinyquery.py    | 4 ++--
 tinyquery/tq_types.py     | 9 +++++++--
 tinyquery/type_context.py | 4 ++--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index c95ec6f..95794df 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -467,7 +467,7 @@ def compile_Literal(self, expr, type_ctx):
             return typed_ast.Literal(expr.value, tq_types.INT)
         if isinstance(expr.value, float):
             return typed_ast.Literal(expr.value, tq_types.FLOAT)
-        elif isinstance(expr.value, basestring):
+        elif isinstance(expr.value, tq_types.STRING_TYPE):
             return typed_ast.Literal(expr.value, tq_types.STRING)
         elif expr.value is None:
             return typed_ast.Literal(expr.value, tq_types.NONETYPE)
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index f5ebf46..b8ee3cd 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -91,7 +91,7 @@ def run_cast_function(key, mode, value):
             elif mode == tq_modes.REPEATED:
                 return map(cast_function, value)
             else:
-                if isinstance(value, str):
+                if isinstance(value, tq_types.BINARY_TYPE):
                     return cast_function(value.decode('utf-8'))
                 else:
                     return cast_function(value)
@@ -379,7 +379,7 @@ class Table(object):
     def __init__(self, name, num_rows, columns):
         assert isinstance(columns, collections.OrderedDict)
         for col_name, column in columns.iteritems():
-            assert isinstance(col_name, basestring)
+            assert isinstance(col_name, tq_types.STRING_TYPE)
             assert len(column.values) == num_rows, (
                 'Column %s had %s rows, expected %s.' % (
                     col_name, len(column.values), num_rows))
diff --git a/tinyquery/tq_types.py b/tinyquery/tq_types.py
index 572778b..a3d9f8f 100644
--- a/tinyquery/tq_types.py
+++ b/tinyquery/tq_types.py
@@ -1,7 +1,11 @@
 """Defines the valid types. Currently we just uses strings to identify them.
 """
+import sys
+
 import arrow
 
+PY3 = sys.version_info[0] == 3
+
 # TODO(Samantha): Structs.
 
 INT = 'INTEGER'
@@ -20,11 +24,12 @@
     INT: int,
     FLOAT: float,
     BOOL: bool,
-    STRING: unicode,
+    STRING: str if PY3 else unicode,
     TIMESTAMP: lambda val: arrow.get(val).to('UTC').naive,
     NONETYPE: lambda _: None,
     'null': lambda _: None
 }
 DATETIME_TYPE_SET = set([INT, STRING, TIMESTAMP])
 
-TYPE_TYPE = basestring
+BINARY_TYPE = bytes if PY3 else str
+TYPE_TYPE = STRING_TYPE = str if PY3 else basestring
diff --git a/tinyquery/type_context.py b/tinyquery/type_context.py
index 9c2a7e8..20053fd 100644
--- a/tinyquery/type_context.py
+++ b/tinyquery/type_context.py
@@ -56,8 +56,8 @@ def from_full_columns(cls, full_columns, implicit_column_context=None,
         """Given just the columns field, fill in alias information."""
         for (table_name, col_name), col_type in full_columns.iteritems():
             if table_name is not None:
-                cls.assert_type(table_name, basestring)
-            cls.assert_type(col_name, basestring)
+                cls.assert_type(table_name, tq_types.STRING_TYPE)
+            cls.assert_type(col_name, tq_types.STRING_TYPE)
             cls.assert_type(col_type, tq_types.TYPE_TYPE)
 
         aliases = {}

From 6fb03ab0448ff5ccddfb411114393aa413839e1a Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 11:52:09 +0000
Subject: [PATCH 06/25] iteritems and itervalues replaced with items and values

---
 tinyquery/api_client.py   |  4 ++--
 tinyquery/compiler.py     |  2 +-
 tinyquery/context.py      | 40 +++++++++++++++++++--------------------
 tinyquery/evaluator.py    |  8 ++++----
 tinyquery/tinyquery.py    | 16 ++++++++--------
 tinyquery/type_context.py | 10 +++++-----
 6 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/tinyquery/api_client.py b/tinyquery/api_client.py
index 1d203c9..1c5803a 100644
--- a/tinyquery/api_client.py
+++ b/tinyquery/api_client.py
@@ -225,7 +225,7 @@ def schema_from_table(table):
     """Given a tinyquery.Table, build an API-compatible schema."""
     return {'fields': [
         {'name': name, 'type': col.type}
-        for name, col in table.columns.iteritems()
+        for name, col in table.columns.items()
     ]}
 
 
@@ -234,7 +234,7 @@ def rows_from_table(table):
     result_rows = []
     for i in xrange(table.num_rows):
         field_values = [{'v': str(col.values[i])}
-                        for col in table.columns.itervalues()]
+                        for col in table.columns.values()]
         result_rows.append({
             'f': field_values
         })
diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 3f33e0e..59dd4ed 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -225,7 +225,7 @@ def compile_table_expr_TableId(self, table_expr):
     def compile_table_ref(self, table_expr, table):
         alias = table_expr.alias or table_expr.name
         columns = collections.OrderedDict([
-            (name, column.type) for name, column in table.columns.iteritems()
+            (name, column.type) for name, column in table.columns.items()
         ])
         type_ctx = type_context.TypeContext.from_table_and_columns(
             alias, columns, None)
diff --git a/tinyquery/context.py b/tinyquery/context.py
index 08f72dd..9bafea1 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -25,7 +25,7 @@ class Context(object):
     """
     def __init__(self, num_rows, columns, aggregate_context):
         assert isinstance(columns, collections.OrderedDict)
-        for (table_name, col_name), column in columns.iteritems():
+        for (table_name, col_name), column in columns.items():
             assert len(column.values) == num_rows, (
                 'Column %s had %s rows, expected %s.' % (
                     (table_name, col_name), len(column.values), num_rows))
@@ -69,22 +69,22 @@ def context_from_table(table, type_context):
     The order of the columns in the type context must match the order of the
     columns in the table.
     """
-    any_column = table.columns.itervalues().next()
+    any_column = table.columns.values().next()
     new_columns = collections.OrderedDict([
         (column_name, column)
         for (column_name, column) in zip(type_context.columns,
-                                         table.columns.itervalues())
+                                         table.columns.values())
     ])
     return Context(len(any_column.values), new_columns, None)
 
 
 def context_with_overlayed_type_context(context, type_context):
     """Given a context, use the given type context for all column names."""
-    any_column = context.columns.itervalues().next()
+    any_column = context.columns.values().next()
     new_columns = collections.OrderedDict([
         (column_name, column)
         for (column_name, column) in zip(type_context.columns,
-                                         context.columns.itervalues())
+                                         context.columns.values())
     ])
     return Context(len(any_column.values), new_columns, None)
 
@@ -94,7 +94,7 @@ def empty_context_from_type_context(type_context):
     result_columns = collections.OrderedDict(
         # TODO(Samantha): Fix this. Mode is not always nullable
         (col_name, Column(type=col_type, mode=tq_modes.NULLABLE, values=[]))
-        for col_name, col_type in type_context.columns.iteritems()
+        for col_name, col_type in type_context.columns.items()
     )
     return Context(0, result_columns, None)
 
@@ -118,7 +118,7 @@ def mask_context(context, mask):
             [r for r in (any(row) for row in mask.values) if r]
         )
         new_columns = collections.OrderedDict()
-        for col_name, col in context.columns.iteritems():
+        for col_name, col in context.columns.items():
             if col.mode == tq_modes.REPEATED:
                 allowable = True
                 new_values = []
@@ -181,7 +181,7 @@ def mask_context(context, mask):
                 values=new_values)
     else:
         orig_column_values = [
-            col.values for col in context.columns.itervalues()]
+            col.values for col in context.columns.values()]
         mask_values = mask.values
         num_rows = len([v for v in mask.values if v])
         new_values = [
@@ -189,7 +189,7 @@ def mask_context(context, mask):
                 type=col.type,
                 mode=col.mode,
                 values=list(itertools.compress(values, mask_values)))
-            for col, values in zip(context.columns.itervalues(),
+            for col, values in zip(context.columns.values(),
                                    orig_column_values)]
         new_columns = collections.OrderedDict([
             (name, col) for name, col in zip(context.columns,
@@ -207,7 +207,7 @@ def empty_context_from_template(context):
         num_rows=0,
         columns=collections.OrderedDict(
             (name, empty_column_from_template(column))
-            for name, column in context.columns.iteritems()
+            for name, column in context.columns.items()
         ),
         aggregate_context=None)
 
@@ -223,7 +223,7 @@ def append_row_to_context(src_context, index, dest_context):
     The schemas of the two contexts must match.
     """
     dest_context.num_rows += 1
-    for name, column in dest_context.columns.iteritems():
+    for name, column in dest_context.columns.items():
         column.values.append(src_context.columns[name].values[index])
 
 
@@ -240,9 +240,9 @@ def append_partial_context_to_context(src_context, dest_context):
     # Ignore fully-qualified names for this operation.
     short_named_src_column_values = {
         col_name: column.values
-        for (_, col_name), column in src_context.columns.iteritems()}
+        for (_, col_name), column in src_context.columns.items()}
 
-    for (_, col_name), dest_column in dest_context.columns.iteritems():
+    for (_, col_name), dest_column in dest_context.columns.items():
         src_column_values = short_named_src_column_values.get(col_name)
         if src_column_values is None:
             dest_column.values.extend([None] * src_context.num_rows)
@@ -257,7 +257,7 @@ def append_context_to_context(src_context, dest_context):
     account.
     """
     dest_context.num_rows += src_context.num_rows
-    for dest_column_key, dest_column in dest_context.columns.iteritems():
+    for dest_column_key, dest_column in dest_context.columns.items():
         src_column = src_context.columns.get(dest_column_key)
         if src_column is None:
             dest_column.values.extend([None] * src_context.num_rows)
@@ -271,7 +271,7 @@ def row_context_from_context(src_context, index):
     columns = collections.OrderedDict(
         (col_name, Column(type=col.type, mode=col.mode,
          values=[col.values[index]]))
-        for col_name, col in src_context.columns.iteritems()
+        for col_name, col in src_context.columns.items()
     )
     return Context(1, columns, None)
 
@@ -281,15 +281,15 @@ def cross_join_contexts(context1, context2):
     assert context2.aggregate_context is None
     result_columns = collections.OrderedDict(
         [(col_name, Column(type=col.type, mode=col.mode, values=[]))
-         for col_name, col in context1.columns.iteritems()] +
+         for col_name, col in context1.columns.items()] +
         [(col_name, Column(type=col.type, mode=col.mode, values=[]))
-         for col_name, col in context2.columns.iteritems()])
+         for col_name, col in context2.columns.items()])
 
     for index1 in xrange(context1.num_rows):
         for index2 in xrange(context2.num_rows):
-            for col_name, column in context1.columns.iteritems():
+            for col_name, column in context1.columns.items():
                 result_columns[col_name].values.append(column.values[index1])
-            for col_name, column in context2.columns.iteritems():
+            for col_name, column in context2.columns.items():
                 result_columns[col_name].values.append(column.values[index2])
     return Context(context1.num_rows * context2.num_rows, result_columns, None)
 
@@ -303,5 +303,5 @@ def truncate_context(context, limit):
         return
     context.num_rows = limit
 
-    for column in context.columns.itervalues():
+    for column in context.columns.values():
         column.values[limit:] = []
diff --git a/tinyquery/evaluator.py b/tinyquery/evaluator.py
index 588eb0d..fc240e4 100644
--- a/tinyquery/evaluator.py
+++ b/tinyquery/evaluator.py
@@ -119,7 +119,7 @@ def evaluate_groups(self, select_fields, group_set, select_context):
 
         result_context = self.empty_context_from_select_fields(select_fields)
         result_col_names = [field.alias for field in select_fields]
-        for context_key, group_context in group_contexts.iteritems():
+        for context_key, group_context in group_contexts.items():
             group_eval_context = context.Context(
                 1, context_key.columns, group_context)
             group_aggregate_result_context = self.evaluate_select_fields(
@@ -153,12 +153,12 @@ def evaluate_orderings(self, overall_context, select_context,
         all_values = []
         sort_by_indexes = collections.OrderedDict()
 
-        for ((_, column_name), column) in overall_context.columns.iteritems():
+        for ((_, column_name), column) in overall_context.columns.items():
             all_values.append(column.values)
 
         for order_by_column in ordering_col:
             for count, ((_, column_name), column) in enumerate(
-                    overall_context.columns.iteritems()):
+                    overall_context.columns.items()):
                 if order_by_column.column_id.name == column_name:
                     sort_by_indexes[count] = order_by_column.is_ascending
                     break
@@ -166,7 +166,7 @@ def evaluate_orderings(self, overall_context, select_context,
             reversed(list(sort_by_indexes.items())))
 
         t_all_values = [list(z) for z in zip(*all_values)]
-        for index, is_ascending in reversed_sort_by_indexes.iteritems():
+        for index, is_ascending in reversed_sort_by_indexes.items():
             t_all_values.sort(key=lambda x: (x[index]),
                               reverse=not is_ascending)
         ordered_values = [list(z) for z in zip(*t_all_values)]
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index ae91889..9e1673c 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -35,7 +35,7 @@ def load_table_from_csv(self, table_name, raw_schema, filename):
                     'Expected {} tokens on line {}, but got {}'.format(
                         len(result_table.columns), line, len(tokens)))
                 for token, column in zip(tokens,
-                                         result_table.columns.itervalues()):
+                                         result_table.columns.values()):
                     # Run a casting function over the value we are given.
                     # CSV doesn't have a null value, so the string 'null' is
                     # used as the null value.
@@ -143,7 +143,7 @@ def flatten_row(output, row, schema, prefix='', ever_repeated=False):
             return output
 
         def process_row(row):
-            for (key, value) in row.iteritems():
+            for (key, value) in row.items():
                 mode = result_table.columns[key].mode
                 token = run_cast_function(key, mode, value)
                 if not tq_modes.check_mode(token, mode):
@@ -226,7 +226,7 @@ def get_table_info(self, project, dataset, table_name):
         table = self.tables_by_name[dataset + '.' + table_name]
         schema_fields = []
         # TODO(colin): record fields should appear grouped.
-        for col_name, column in table.columns.iteritems():
+        for col_name, column in table.columns.items():
             schema_fields.append({
                 'name': col_name,
                 'type': column.type,
@@ -293,7 +293,7 @@ def run_query_job(self, project_id, query, dest_dataset, dest_table_name,
     def table_from_context(table_name, ctx):
         return Table(table_name, ctx.num_rows, collections.OrderedDict(
             (col_name, column)
-            for (_, col_name), column in ctx.columns.iteritems()
+            for (_, col_name), column in ctx.columns.items()
         ))
 
     def run_copy_job(self, project_id, src_dataset, src_table_name,
@@ -337,7 +337,7 @@ def load_empty_table_from_template(self, table_name, template_table):
             # TODO(Samantha): This shouldn't just be nullable.
             (col_name, context.Column(type=col.type, mode=tq_modes.NULLABLE,
                                       values=[]))
-            for col_name, col in template_table.columns.iteritems()
+            for col_name, col in template_table.columns.items()
         )
         table = Table(table_name, 0, columns)
         self.load_table_or_view(table)
@@ -345,13 +345,13 @@ def load_empty_table_from_template(self, table_name, template_table):
     @staticmethod
     def clear_table(table):
         table.num_rows = 0
-        for column in table.columns.itervalues():
+        for column in table.columns.values():
             column.values[:] = []
 
     @staticmethod
     def append_to_table(src_table, dest_table):
         dest_table.num_rows += src_table.num_rows
-        for col_name, column in dest_table.columns.iteritems():
+        for col_name, column in dest_table.columns.items():
             if col_name in src_table.columns:
                 column.values.extend(src_table.columns[col_name].values)
             else:
@@ -378,7 +378,7 @@ class Table(object):
     """
     def __init__(self, name, num_rows, columns):
         assert isinstance(columns, collections.OrderedDict)
-        for col_name, column in columns.iteritems():
+        for col_name, column in columns.items():
             assert isinstance(col_name, basestring)
             assert len(column.values) == num_rows, (
                 'Column %s had %s rows, expected %s.' % (
diff --git a/tinyquery/type_context.py b/tinyquery/type_context.py
index 9c2a7e8..c3a43e3 100644
--- a/tinyquery/type_context.py
+++ b/tinyquery/type_context.py
@@ -41,7 +41,7 @@ def from_table_and_columns(cls, table_name, columns_without_table,
             collections.OrderedDict(
                 ((table_name, column_name), col_type)
                 for column_name, col_type
-                in columns_without_table.iteritems()),
+                in columns_without_table.items()),
             implicit_column_context, aggregate_context)
 
     @staticmethod
@@ -54,7 +54,7 @@ def assert_type(value, expected_type):
     def from_full_columns(cls, full_columns, implicit_column_context=None,
                           aggregate_context=None):
         """Given just the columns field, fill in alias information."""
-        for (table_name, col_name), col_type in full_columns.iteritems():
+        for (table_name, col_name), col_type in full_columns.items():
             if table_name is not None:
                 cls.assert_type(table_name, basestring)
             cls.assert_type(col_name, basestring)
@@ -88,7 +88,7 @@ def union_contexts(cls, contexts):
         for context in contexts:
             assert context.aggregate_context is None
 
-            for (_, column_name), col_type in context.columns.iteritems():
+            for (_, column_name), col_type in context.columns.items():
                 full_column = (None, column_name)
                 if full_column in result_columns:
                     if result_columns[full_column] == col_type:
@@ -153,7 +153,7 @@ def context_with_subquery_alias(self, subquery_alias):
             collections.OrderedDict(
                 ((subquery_alias, col_name), col_type)
                 for (_, col_name), col_type
-                in self.implicit_column_context.columns.iteritems()
+                in self.implicit_column_context.columns.items()
             )
         )
         return TypeContext(self.columns, self.aliases, self.ambig_aliases,
@@ -163,7 +163,7 @@ def context_with_full_alias(self, alias):
         assert self.aggregate_context is None
         new_columns = collections.OrderedDict(
             ((alias, col_name), col_type)
-            for (_, col_name), col_type in self.columns.iteritems()
+            for (_, col_name), col_type in self.columns.items()
         )
         if self.implicit_column_context:
             new_implicit_column_context = (

From 6020010ab6026096dbfd0a9ec5156b312b000564 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 11:52:55 +0000
Subject: [PATCH 07/25] Fixing .values

---
 tinyquery/lexer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tinyquery/lexer.py b/tinyquery/lexer.py
index 61cccd3..26394d7 100644
--- a/tinyquery/lexer.py
+++ b/tinyquery/lexer.py
@@ -63,7 +63,7 @@
     'FLOAT',
     'ID',
     'STRING'
-] + reserved_words.values()
+] + list(reserved_words.values())  # wrapping with list() to support python 3
 
 
 t_PLUS = r'\+'

From 0130c9e8f8b83e5be84fe3010b5803ef2d3bd810 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 12:10:19 +0000
Subject: [PATCH 08/25] Python 3 zip support

---
 tinyquery/compiler.py      | 8 +++++---
 tinyquery/repeated_util.py | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 59dd4ed..30347dc 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -279,9 +279,11 @@ def compile_table_expr_Join(self, table_expr):
             type_contexts)
         return typed_ast.Join(
             base=compiled_table_exprs[0],
-            tables=zip(compiled_table_exprs[1:],
-                       (join_part.join_type
-                        for join_part in table_expr.join_parts)),
+            # wrapping in list() for python 3 support (shouldn't be a large number
+            # of items so performance impact should be minimal)
+            tables=list(zip(compiled_table_exprs[1:],
+                            (join_part.join_type
+                             for join_part in table_expr.join_parts))),
             conditions=result_fields,
             type_ctx=result_type_ctx)
 
diff --git a/tinyquery/repeated_util.py b/tinyquery/repeated_util.py
index d88ca97..a5d8faf 100644
--- a/tinyquery/repeated_util.py
+++ b/tinyquery/repeated_util.py
@@ -80,7 +80,8 @@ def flatten_column_values(repeated_column_indices, column_values):
             values.  The list for each column will not contain nested
             lists.
     """
-    rows = zip(*column_values)
+    # wrapping in list for python 3 support
+    rows = list(zip(*column_values))
     repetition_counts = [
         max(max(len(row[idx]) for idx in repeated_column_indices), 1)
         for row in rows

From c98561c82a3b99c068269b0a0df7a9d73696e4f0 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 12:18:15 +0000
Subject: [PATCH 09/25] six for xrange

---
 setup.py                     |  2 +-
 tinyquery/api_client.py      |  4 +++-
 tinyquery/api_client_test.py |  2 +-
 tinyquery/context.py         |  6 ++++--
 tinyquery/evaluator.py       | 12 +++++++-----
 tinyquery/runtime.py         |  7 ++++---
 6 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/setup.py b/setup.py
index 645af58..908996e 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
     url='https://github.com/Khan/tinyquery',
     keywords=['bigquery'],
     packages=['tinyquery'],
-    install_requires=['arrow==0.12.1', 'ply==3.10'],
+    install_requires=['arrow==0.12.1', 'ply==3.10', 'six==1.11.0'],
     classifiers=[
         'License :: OSI Approved :: MIT License',
         'Programming Language :: Python :: 2',
diff --git a/tinyquery/api_client.py b/tinyquery/api_client.py
index 1c5803a..51bd193 100644
--- a/tinyquery/api_client.py
+++ b/tinyquery/api_client.py
@@ -7,6 +7,8 @@
 import functools
 import json
 
+import six
+
 
 class TinyQueryApiClient(object):
     def __init__(self, tq_service):
@@ -232,7 +234,7 @@ def schema_from_table(table):
 def rows_from_table(table):
     """Given a tinyquery.Table, build an API-compatible rows object."""
     result_rows = []
-    for i in xrange(table.num_rows):
+    for i in six.moves.xrange(table.num_rows):
         field_values = [{'v': str(col.values[i])}
                         for col in table.columns.values()]
         result_rows.append({
diff --git a/tinyquery/api_client_test.py b/tinyquery/api_client_test.py
index 541e63b..7d4b818 100644
--- a/tinyquery/api_client_test.py
+++ b/tinyquery/api_client_test.py
@@ -144,7 +144,7 @@ def test_table_copy(self):
             }
         ).execute()
 
-        for _ in xrange(5):
+        for _ in range(5):
             self.tq_service.jobs().insert(
                 projectId='test_project',
                 body={
diff --git a/tinyquery/context.py b/tinyquery/context.py
index 9bafea1..6805ffe 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -7,6 +7,8 @@
 import itertools
 import logging
 
+import six
+
 import repeated_util
 import tq_modes
 
@@ -285,8 +287,8 @@ def cross_join_contexts(context1, context2):
         [(col_name, Column(type=col.type, mode=col.mode, values=[]))
          for col_name, col in context2.columns.items()])
 
-    for index1 in xrange(context1.num_rows):
-        for index2 in xrange(context2.num_rows):
+    for index1 in six.moves.xrange(context1.num_rows):
+        for index2 in six.moves.xrange(context2.num_rows):
             for col_name, column in context1.columns.items():
                 result_columns[col_name].values.append(column.values[index1])
             for col_name, column in context2.columns.items():
diff --git a/tinyquery/evaluator.py b/tinyquery/evaluator.py
index fc240e4..23e5156 100644
--- a/tinyquery/evaluator.py
+++ b/tinyquery/evaluator.py
@@ -2,6 +2,8 @@
 # pep8-disable:E115,E128
 import collections
 
+import six
+
 import context
 import tq_ast
 import tq_modes
@@ -105,7 +107,7 @@ def evaluate_groups(self, select_fields, group_set, select_context):
 
         # TODO: Seems pretty ugly and wasteful to use a whole context as a
         # group key.
-        for i in xrange(select_context.num_rows):
+        for i in six.moves.xrange(select_context.num_rows):
             key = self.get_group_key(
                 field_groups, alias_group_list, select_context,
                 alias_group_result_context, i)
@@ -280,7 +282,7 @@ def evaluate_within(self, select_fields, group_set, ctx,
             context.append_context_to_context(ctx, ctx_with_primary_key)
 
             (table_name, _), _ = ctx_with_primary_key.columns.items()[0]
-            row_nums = range(1, ctx_with_primary_key.num_rows + 1)
+            row_nums = list(six.moves.xrange(1, ctx_with_primary_key.num_rows + 1))
             row_nums_col = context.Column(
                 type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums)
             ctx_with_primary_key.columns[(table_name,
@@ -382,7 +384,7 @@ def eval_table_Join(self, table_expr):
             lhs_key_refs = [cond.column1 for cond in conditions]
             rhs_key_refs = [cond.column2 for cond in conditions]
             rhs_key_contexts = {}
-            for i in xrange(rhs_context.num_rows):
+            for i in six.moves.xrange(rhs_context.num_rows):
                 rhs_key = self.get_join_key(rhs_context, rhs_key_refs, i)
                 if rhs_key not in rhs_key_contexts:
                     rhs_key_contexts[rhs_key] = (
@@ -395,7 +397,7 @@ def eval_table_Join(self, table_expr):
                 context.empty_context_from_template(lhs_context),
                 context.empty_context_from_template(rhs_context))
 
-            for i in xrange(lhs_context.num_rows):
+            for i in six.moves.xrange(lhs_context.num_rows):
                 lhs_key = self.get_join_key(lhs_context, lhs_key_refs, i)
                 lhs_row_context = context.row_context_from_context(
                     lhs_context, i)
@@ -468,7 +470,7 @@ def evaluate_AggregateFunctionCall(self, func_call, context):
         return func_call.func.evaluate(context.num_rows, *arg_results)
 
     def evaluate_Literal(self, literal, context_object):
-        values = [literal.value for _ in xrange(context_object.num_rows)]
+        values = [literal.value for _ in six.moves.xrange(context_object.num_rows)]
         return context.Column(type=literal.type, mode=tq_modes.NULLABLE,
                               values=values)
 
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 87a3d71..2328887 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -9,6 +9,7 @@
 import time
 
 import arrow
+import six
 
 import compiler
 import context
@@ -418,7 +419,7 @@ def check_types(self):
         return tq_types.FLOAT
 
     def _evaluate(self, num_rows):
-        values = [random.random() for _ in xrange(num_rows)]
+        values = [random.random() for _ in six.moves.xrange(num_rows)]
         # TODO(Samantha): Should this be required?
         return context.Column(type=tq_types.FLOAT, mode=tq_modes.NULLABLE,
                               values=values)
@@ -564,7 +565,7 @@ def check_types(self):
 
     def _evaluate(self, num_rows):
         return context.Column(type=self.type, mode=tq_modes.NULLABLE,
-                              values=[self.func() for _ in xrange(num_rows)])
+                              values=[self.func() for _ in six.moves.xrange(num_rows)])
 
 
 class InFunction(ScalarFunction):
@@ -706,7 +707,7 @@ def _evaluate(self, num_rows, column, num_quantiles_list):
             sorted_args[
                 min(len(sorted_args) * i / (num_quantiles - 1),
                     len(sorted_args) - 1)
-            ] for i in xrange(num_quantiles)
+            ] for i in six.moves.xrange(num_quantiles)
         ]]
         return context.Column(type=tq_types.INT, mode=tq_modes.REPEATED,
                               values=values)

From 9e96b657b25e5ca11c7de59a1b3c729f9c542351 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 12:25:24 +0000
Subject: [PATCH 10/25] Fixing uses of next

---
 tinyquery/context.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tinyquery/context.py b/tinyquery/context.py
index 9bafea1..89cb754 100644
--- a/tinyquery/context.py
+++ b/tinyquery/context.py
@@ -69,7 +69,7 @@ def context_from_table(table, type_context):
     The order of the columns in the type context must match the order of the
     columns in the table.
     """
-    any_column = table.columns.values().next()
+    any_column = table.columns[next(iter(table.columns))]
     new_columns = collections.OrderedDict([
         (column_name, column)
         for (column_name, column) in zip(type_context.columns,
@@ -80,7 +80,7 @@ def context_from_table(table, type_context):
 
 def context_with_overlayed_type_context(context, type_context):
     """Given a context, use the given type context for all column names."""
-    any_column = context.columns.values().next()
+    any_column = context.columns[next(iter(context.columns))]
     new_columns = collections.OrderedDict([
         (column_name, column)
         for (column_name, column) in zip(type_context.columns,

From b86aad835f186ca5a6a50a4820c60d7382d27bc9 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 12:28:34 +0000
Subject: [PATCH 11/25] Using six instead

---
 tinyquery/compiler.py     |  4 +++-
 tinyquery/tinyquery.py    |  6 ++++--
 tinyquery/tq_types.py     | 10 +++-------
 tinyquery/type_context.py |  6 ++++--
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 95794df..9b2a7e4 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -7,6 +7,8 @@
 import collections
 import itertools
 
+import six
+
 import parser
 import runtime
 import tq_ast
@@ -467,7 +469,7 @@ def compile_Literal(self, expr, type_ctx):
             return typed_ast.Literal(expr.value, tq_types.INT)
         if isinstance(expr.value, float):
             return typed_ast.Literal(expr.value, tq_types.FLOAT)
-        elif isinstance(expr.value, tq_types.STRING_TYPE):
+        elif isinstance(expr.value, six.text_type):
             return typed_ast.Literal(expr.value, tq_types.STRING)
         elif expr.value is None:
             return typed_ast.Literal(expr.value, tq_types.NONETYPE)
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index b8ee3cd..3435793 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -2,6 +2,8 @@
 import collections
 import json
 
+import six
+
 import compiler
 import context
 import evaluator
@@ -91,7 +93,7 @@ def run_cast_function(key, mode, value):
             elif mode == tq_modes.REPEATED:
                 return map(cast_function, value)
             else:
-                if isinstance(value, tq_types.BINARY_TYPE):
+                if isinstance(value, six.binary_type):
                     return cast_function(value.decode('utf-8'))
                 else:
                     return cast_function(value)
@@ -379,7 +381,7 @@ class Table(object):
     def __init__(self, name, num_rows, columns):
         assert isinstance(columns, collections.OrderedDict)
         for col_name, column in columns.iteritems():
-            assert isinstance(col_name, tq_types.STRING_TYPE)
+            assert isinstance(col_name, six.text_type)
             assert len(column.values) == num_rows, (
                 'Column %s had %s rows, expected %s.' % (
                     col_name, len(column.values), num_rows))
diff --git a/tinyquery/tq_types.py b/tinyquery/tq_types.py
index a3d9f8f..43ba052 100644
--- a/tinyquery/tq_types.py
+++ b/tinyquery/tq_types.py
@@ -1,10 +1,7 @@
 """Defines the valid types. Currently we just uses strings to identify them.
 """
-import sys
-
 import arrow
-
-PY3 = sys.version_info[0] == 3
+import six
 
 # TODO(Samantha): Structs.
 
@@ -24,12 +21,11 @@
     INT: int,
     FLOAT: float,
     BOOL: bool,
-    STRING: str if PY3 else unicode,
+    STRING: six.text_type,
     TIMESTAMP: lambda val: arrow.get(val).to('UTC').naive,
     NONETYPE: lambda _: None,
     'null': lambda _: None
 }
 DATETIME_TYPE_SET = set([INT, STRING, TIMESTAMP])
 
-BINARY_TYPE = bytes if PY3 else str
-TYPE_TYPE = STRING_TYPE = str if PY3 else basestring
+TYPE_TYPE = six.string_types
diff --git a/tinyquery/type_context.py b/tinyquery/type_context.py
index 20053fd..03c3398 100644
--- a/tinyquery/type_context.py
+++ b/tinyquery/type_context.py
@@ -1,6 +1,8 @@
 import collections
 import re
 
+import six
+
 import compiler
 import tq_types
 import typed_ast
@@ -56,8 +58,8 @@ def from_full_columns(cls, full_columns, implicit_column_context=None,
         """Given just the columns field, fill in alias information."""
         for (table_name, col_name), col_type in full_columns.iteritems():
             if table_name is not None:
-                cls.assert_type(table_name, tq_types.STRING_TYPE)
-            cls.assert_type(col_name, tq_types.STRING_TYPE)
+                cls.assert_type(table_name, six.text_type)
+            cls.assert_type(col_name, six.text_type)
             cls.assert_type(col_type, tq_types.TYPE_TYPE)
 
         aliases = {}

From 2c4038d86086aba70c2231cc900d802b871e4a37 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 12:30:04 +0000
Subject: [PATCH 12/25] Correct types

---
 tinyquery/compiler.py     | 2 +-
 tinyquery/tinyquery.py    | 2 +-
 tinyquery/type_context.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tinyquery/compiler.py b/tinyquery/compiler.py
index 9b2a7e4..985c16c 100644
--- a/tinyquery/compiler.py
+++ b/tinyquery/compiler.py
@@ -469,7 +469,7 @@ def compile_Literal(self, expr, type_ctx):
             return typed_ast.Literal(expr.value, tq_types.INT)
         if isinstance(expr.value, float):
             return typed_ast.Literal(expr.value, tq_types.FLOAT)
-        elif isinstance(expr.value, six.text_type):
+        elif isinstance(expr.value, six.string_types):
             return typed_ast.Literal(expr.value, tq_types.STRING)
         elif expr.value is None:
             return typed_ast.Literal(expr.value, tq_types.NONETYPE)
diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index 3435793..4d79850 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -381,7 +381,7 @@ class Table(object):
     def __init__(self, name, num_rows, columns):
         assert isinstance(columns, collections.OrderedDict)
         for col_name, column in columns.iteritems():
-            assert isinstance(col_name, six.text_type)
+            assert isinstance(col_name, six.string_types)
             assert len(column.values) == num_rows, (
                 'Column %s had %s rows, expected %s.' % (
                     col_name, len(column.values), num_rows))
diff --git a/tinyquery/type_context.py b/tinyquery/type_context.py
index 03c3398..10106e9 100644
--- a/tinyquery/type_context.py
+++ b/tinyquery/type_context.py
@@ -58,8 +58,8 @@ def from_full_columns(cls, full_columns, implicit_column_context=None,
         """Given just the columns field, fill in alias information."""
         for (table_name, col_name), col_type in full_columns.iteritems():
             if table_name is not None:
-                cls.assert_type(table_name, six.text_type)
-            cls.assert_type(col_name, six.text_type)
+                cls.assert_type(table_name, six.string_types)
+            cls.assert_type(col_name, six.string_types)
             cls.assert_type(col_type, tq_types.TYPE_TYPE)
 
         aliases = {}

From 1f2a71d263ad1b60aa643cab568b973530444990 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 13:13:07 +0000
Subject: [PATCH 13/25] Fixing python 3 issues

---
 tinyquery/evaluator.py | 2 +-
 tinyquery/runtime.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tinyquery/evaluator.py b/tinyquery/evaluator.py
index fc240e4..2ef4577 100644
--- a/tinyquery/evaluator.py
+++ b/tinyquery/evaluator.py
@@ -279,7 +279,7 @@ def evaluate_within(self, select_fields, group_set, ctx,
             ctx_with_primary_key = context.empty_context_from_template(ctx)
             context.append_context_to_context(ctx, ctx_with_primary_key)
 
-            (table_name, _), _ = ctx_with_primary_key.columns.items()[0]
+            table_name = next(iter(ctx_with_primary_key.columns))
             row_nums = range(1, ctx_with_primary_key.num_rows + 1)
             row_nums_col = context.Column(
                 type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums)
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 87a3d71..99467dd 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -704,7 +704,7 @@ def _evaluate(self, num_rows, column, num_quantiles_list):
         # quantile, so we need one more set of brackets than you might expect.
         values = [[
             sorted_args[
-                min(len(sorted_args) * i / (num_quantiles - 1),
+                min(len(sorted_args) * i // (num_quantiles - 1),
                     len(sorted_args) - 1)
             ] for i in xrange(num_quantiles)
         ]]

From 8a1793b758f1cecd7954395c1a89c5cbda11f0a1 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Wed, 31 Jan 2018 13:18:42 +0000
Subject: [PATCH 14/25] functools.reduce instead of reduce

---
 tinyquery/runtime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 99467dd..7b87fe8 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -963,7 +963,7 @@ def apply(*args):
             # is usually to return NULL if any arguments are NULL.
             if any(arg is None for arg in args):
                 return None
-            return reduce(self.reducer, args)
+            return functools.reduce(self.reducer, args)
 
         values = [apply(*vals)
                   for vals in zip(*[col.values for col in columns])]

From 8ed1a52c250bedd870b9f97b5c897f7b7690e285 Mon Sep 17 00:00:00 2001
From: Sam Cooke <sam@mixcloud.com>
Date: Tue, 13 Feb 2018 11:39:06 +0000
Subject: [PATCH 15/25] Adding support for LAST

---
 tinyquery/evaluator_test.py | 16 ++++++++++++++++
 tinyquery/runtime.py        | 21 ++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 61d2c2c..d49fe7b 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1270,6 +1270,22 @@ def test_first(self):
             ])
         )
 
+    def test_last(self):
+        # Test over the equivalent of a GROUP BY
+        self.assert_query_result(
+            'SELECT LAST(val1) FROM test_table',
+            self.make_context([
+                ('f0_', tq_types.INT, [2])
+            ])
+        )
+        # Test over something repeated
+        self.assert_query_result(
+            'SELECT LAST(QUANTILES(val1, 3)) FROM test_table',
+            self.make_context([
+                ('f0_', tq_types.INT, [8])
+            ])
+        )
+
         # TODO(colin): test behavior on empty list in both cases
 
     def test_left(self):
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 12d0eab..2c33266 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -558,6 +558,24 @@ def _evaluate(self, num_rows, column):
                               values=values)
 
 
+class LastFunction(AggregateFunction):
+    def check_types(self, rep_list_type):
+        return rep_list_type
+
+    def _evaluate(self, num_rows, column):
+        values = []
+        if len(column.values) == 0:
+            values = [None]
+
+        if column.mode == tq_modes.REPEATED:
+            values = [repeated_row[-1] if len(repeated_row) > 0 else None
+                      for repeated_row in column.values]
+        else:
+            values = [column.values[-1]]
+        return context.Column(type=column.type, mode=tq_modes.NULLABLE,
+                              values=values)
+
+
 class NoArgFunction(ScalarFunction):
     def __init__(self, func, return_type=tq_types.INT):
         self.func = func
@@ -1304,7 +1322,8 @@ def _evaluate(self, num_rows, json_expressions, json_paths):
     'count_distinct': CountDistinctFunction(),
     'stddev_samp': StddevSampFunction(),
     'quantiles': QuantilesFunction(),
-    'first': FirstFunction()
+    'first': FirstFunction(),
+    'last': LastFunction(),
 }
 
 

From 45e8902a29d189b573e0c5732a67034f3d7a451b Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Wed, 14 Feb 2018 11:32:57 +0000
Subject: [PATCH 16/25] Fix num_rows when loading table from JSON

---
 tinyquery/tinyquery.py      |  1 +
 tinyquery/tinyquery_test.py | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/tinyquery/tinyquery.py b/tinyquery/tinyquery.py
index f5ebf46..0898401 100644
--- a/tinyquery/tinyquery.py
+++ b/tinyquery/tinyquery.py
@@ -157,6 +157,7 @@ def process_row(row):
             row = json.loads(line)
             flattened_row = flatten_row({}, row, fake_raw_schema)
             process_row(flattened_row)
+            result_table.num_rows += 1
 
         self.load_table_or_view(result_table)
 
diff --git a/tinyquery/tinyquery_test.py b/tinyquery/tinyquery_test.py
index 40e5f74..a7e182c 100644
--- a/tinyquery/tinyquery_test.py
+++ b/tinyquery/tinyquery_test.py
@@ -66,6 +66,7 @@ def test_make_empty_table(self):
         table = tinyquery.TinyQuery.make_empty_table(
             'test_table', self.record_schema)
         self.assertIn('r.r2.d2', table.columns)
+        self.assertEqual(table.num_rows, 0)
 
     def test_load_table_from_newline_delimited_json(self):
         record_json = json.dumps({
@@ -86,6 +87,7 @@ def test_load_table_from_newline_delimited_json(self):
         table = tq.tables_by_name['test_table']
         self.assertIn('r.r2.d2', table.columns)
         self.assertIn(3, table.columns['r.r2.d2'].values)
+        self.assertEqual(table.num_rows, 1)
 
     def test_load_json_with_null_records(self):
         record_json = json.dumps({
@@ -135,3 +137,21 @@ def test_load_json_with_repeated_records(self):
                          ['a', 'b', 'c', 'd', 'e'])
         self.assertEqual(table.columns['r.inner_repeated'].values[0],
                          ['l', 'm', 'n'])
+
+    def test_load_table_multiple_rows_count(self):
+        record_json = json.dumps({
+            'i': 1,
+            'r': {
+                's': 'hello!',
+                'r2': {
+                    'd2': 3,
+                },
+            },
+        })
+        tq = tinyquery.TinyQuery()
+        tq.load_table_from_newline_delimited_json(
+            'test_table',
+            json.dumps(self.record_schema['fields']),
+            [record_json, record_json, record_json, record_json])
+        table = tq.tables_by_name['test_table']
+        self.assertEqual(table.num_rows, 4)

From 514979ee7d3520ef95f4ad426a234ab8b024558d Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Wed, 14 Mar 2018 10:52:01 +0000
Subject: [PATCH 17/25] Add string_agg

---
 tinyquery/evaluator_test.py | 14 +++++++-------
 tinyquery/runtime.py        |  5 +++--
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index f635efa..0801b8f 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1004,28 +1004,28 @@ def test_null_count_distinct(self):
             'SELECT COUNT(DISTINCT val1) FROM some_nulls_table',
             self.make_context([('f0_', tq_types.INT, [2])]))
 
-    def test_group_concat_unquoted(self):
+    def test_string_agg(self):
         self.assert_query_result(
-            'SELECT GROUP_CONCAT_UNQUOTED(str) FROM string_table',
+            'SELECT STRING_AGG(str) FROM string_table',
             self.make_context([
                 ('f0_', tq_types.STRING, ['hello,world'])
             ]))
         self.assert_query_result(
-            'SELECT GROUP_CONCAT_UNQUOTED(children.name) FROM record_table_2',
+            'SELECT STRING_AGG(children.name) FROM record_table_2',
             self.make_context([
                 ('f0_', tq_types.STRING, ['Jane,John,Earl,Sam,Kit'])
             ]))
 
-    def test_null_group_concat_unquoted(self):
+    def test_null_string_agg(self):
         self.assert_query_result(
-            'SELECT GROUP_CONCAT_UNQUOTED(str) FROM string_table_with_null',
+            'SELECT STRING_AGG(str) FROM string_table_with_null',
             self.make_context([
                 ('f0_', tq_types.STRING, ['hello,world'])
             ]))
 
-    def test_group_concat_unquoted_separator(self):
+    def test_string_agg_separator(self):
         self.assert_query_result(
-            'SELECT GROUP_CONCAT_UNQUOTED(str, \' || \') FROM string_table',
+            'SELECT STRING_AGG(str, \' || \') FROM string_table',
             self.make_context([
                 ('f0_', tq_types.STRING, ['hello || world'])
             ]))
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index bd2c35f..5dd981d 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -693,7 +693,7 @@ def _evaluate(self, num_rows, column):
                               values=[len(set(values) - set([None]))])
 
 
-class GroupConcatUnquotedFunction(AggregateFunction):
+class StringAggFunction(AggregateFunction):
     def check_types(self, *arg_types):
         return tq_types.STRING
 
@@ -1332,7 +1332,8 @@ def _evaluate(self, num_rows, json_expressions, json_paths):
     'count': CountFunction(),
     'avg': AvgFunction(),
     'count_distinct': CountDistinctFunction(),
-    'group_concat_unquoted': GroupConcatUnquotedFunction(),
+    'string_agg': StringAggFunction(),
+    'group_concat_unquoted': StringAggFunction(),
     'stddev_samp': StddevSampFunction(),
     'quantiles': QuantilesFunction(),
     'first': FirstFunction(),

From 6fe4b4932f25984d11a6ef1a744845c71f9dcfd3 Mon Sep 17 00:00:00 2001
From: Sam Millar <sam@millar.io>
Date: Wed, 18 Apr 2018 22:13:34 +0100
Subject: [PATCH 18/25] Fix datetime typo (#14)

---
 tinyquery/runtime.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 5dd981d..5ad8922 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -212,7 +212,7 @@ def _evaluate(self, num_rows, column1, column2):
             if other_column.type == tq_types.STRING:
                 # Convert that string to datetime if we can.
                 try:
-                    converted = [arrow.get(x).to('UTC').native
+                    converted = [arrow.get(x).to('UTC').naive
                                  for x in other_column.values]
                 except:
                     raise TypeError('Invalid comparison on timestamp, '

From 12cc00131e8c3d12ecc14f6e1336a1dcdfe86f80 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Tue, 29 May 2018 11:19:28 +0100
Subject: [PATCH 19/25] Add replace function

---
 tinyquery/evaluator_test.py |  5 +++++
 tinyquery/runtime.py        | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index f635efa..38545af 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1339,6 +1339,11 @@ def test_other_timestamp_functions(self):
             self.make_context([
                 ('f0_', tq_types.INT, [1262304000000000])]))
 
+    def test_replace(self):
+        self.assert_query_result(
+            "SELECT REPLACE(str, 'o', 'e') FROM string_table_with_null",
+            self.make_context([('f0_', tq_types.STRING, ["helle", "werld", null])]))
+
     def test_first(self):
         # Test over the equivalent of a GROUP BY
         self.assert_query_result(
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index bd2c35f..76860b7 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -1011,6 +1011,20 @@ def apply(*args):
             values=values)
 
 
+class ReplaceFunction(ScalarFunction):
+    def check_types(self, *arg_types):
+        if any(arg_type != tq_types.STRING for arg_type in arg_types):
+            raise TypeError('REPLACE only takes string arguments.')
+        return tq_types.STRING
+
+    def _evaluate(self, num_rows, values, old, new):
+        replace_fn = pass_through_none(
+                lambda s: s.replace(old, new))
+        values = [replace_fn(x) for x in values.values]
+        return context.Column(tq_types.STRING, tq_modes.NULLABLE,
+                              values=values)
+
+
 class JSONExtractFunction(ScalarFunction):
     """Extract from a JSON string based on a JSONPath expression.
 

From a67289bb89c7e9f60741609eda3175a4a4e8e798 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Tue, 29 May 2018 11:28:40 +0100
Subject: [PATCH 20/25] Add to replace to dict

---
 tinyquery/runtime.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 76860b7..dabcff6 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -1334,6 +1334,7 @@ def _evaluate(self, num_rows, json_expressions, json_paths):
             lambda dt: dt.year,
             return_type=tq_types.INT),
         TimestampFunction()),
+    'replace': ReplaceFunction(),
     'json_extract': JSONExtractFunction(),
     'json_extract_scalar': JSONExtractFunction(scalar=True),
 }

From b58a3d9b24edf858ecb024d5f30383da2d807706 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Tue, 29 May 2018 11:57:05 +0100
Subject: [PATCH 21/25] Fix

---
 tinyquery/runtime.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 88c817a..6d7fa8c 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -1018,9 +1018,10 @@ def check_types(self, *arg_types):
         return tq_types.STRING
 
     def _evaluate(self, num_rows, values, old, new):
-        replace_fn = pass_through_none(
-                lambda s: s.replace(old, new))
-        values = [replace_fn(x) for x in values.values]
+        values = [value.replace(old, new) if value is not None else None
+                  for value, old, new in zip(values.values,
+                                              old.values,
+                                              new.values)]
         return context.Column(tq_types.STRING, tq_modes.NULLABLE,
                               values=values)
 

From 4951b8572db1127ee1d6044810205179ae6afb1c Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Thu, 14 Mar 2019 12:27:18 +0000
Subject: [PATCH 22/25] Add FORMAT_TIMESTAMP

---
 tinyquery/runtime.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 6d7fa8c..91f2254 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -1250,6 +1250,7 @@ def _evaluate(self, num_rows, json_expressions, json_paths):
             lambda dt: int(dt.strftime('%j'), 10),
             return_type=tq_types.INT),
         TimestampFunction()),
+    'format_timestamp': StrftimeFunction(),
     'format_utc_usec': Compose(
         TimestampExtractFunction(
             lambda dt: dt.strftime('%Y-%m-%d %H:%M:%S.%f'),

From 64a848fc088a20a17f9f37234862ca67ae4eb196 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Thu, 14 Mar 2019 12:42:44 +0000
Subject: [PATCH 23/25] Fix test

---
 tinyquery/evaluator_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 02a86fa..bf21293 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1342,7 +1342,7 @@ def test_other_timestamp_functions(self):
     def test_replace(self):
         self.assert_query_result(
             "SELECT REPLACE(str, 'o', 'e') FROM string_table_with_null",
-            self.make_context([('f0_', tq_types.STRING, ["helle", "werld", null])]))
+            self.make_context([('f0_', tq_types.STRING, ["helle", "werld", None])]))
 
     def test_first(self):
         # Test over the equivalent of a GROUP BY

From a39fd3176a2fa1e3e34cab8b44b71e25a0ea0859 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Thu, 14 Mar 2019 12:42:52 +0000
Subject: [PATCH 24/25] Add tests

---
 tinyquery/evaluator_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 02a86fa..65972a8 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1307,6 +1307,11 @@ def test_other_timestamp_functions(self):
             'SELECT STRFTIME_UTC_USEC(1274259481071200, "%Y-%m-%d")',
             self.make_context([
                 ('f0_', tq_types.STRING, ['2010-05-19'])]))
+        
+        self.assert_query_result(
+            'SELECT FORMAT_TIMESTAMP(TIMESTAMP("2015-01-02 00:00:00"), "%Y-%m-%d")',
+            self.make_context([
+                ('f0_', tq_types.STRING, ['2015-01-02'])]))
 
         self.assert_query_result(
             'SELECT USEC_TO_TIMESTAMP(1349053323000000)',

From 3224623d0ab1229c8ada892607a26efa9a51d7b1 Mon Sep 17 00:00:00 2001
From: Sam Millar <sammillar@mixcloud.com>
Date: Wed, 15 May 2019 18:06:27 +0100
Subject: [PATCH 25/25] Fix arg order

---
 tinyquery/evaluator_test.py |  2 +-
 tinyquery/runtime.py        | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/tinyquery/evaluator_test.py b/tinyquery/evaluator_test.py
index 8ac224b..0f177b9 100644
--- a/tinyquery/evaluator_test.py
+++ b/tinyquery/evaluator_test.py
@@ -1309,7 +1309,7 @@ def test_other_timestamp_functions(self):
                 ('f0_', tq_types.STRING, ['2010-05-19'])]))
         
         self.assert_query_result(
-            'SELECT FORMAT_TIMESTAMP(TIMESTAMP("2015-01-02 00:00:00"), "%Y-%m-%d")',
+            'SELECT FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP("2015-01-02 00:00:00"))',
             self.make_context([
                 ('f0_', tq_types.STRING, ['2015-01-02'])]))
 
diff --git a/tinyquery/runtime.py b/tinyquery/runtime.py
index 91f2254..5051ebb 100644
--- a/tinyquery/runtime.py
+++ b/tinyquery/runtime.py
@@ -981,6 +981,23 @@ def _evaluate(self, num_rows, unix_timestamps, formats):
                               values=values)
 
 
+class FormatTimestampFunction(ScalarFunction):
+    def check_types(self, type1, type2):
+        if not (type2 in tq_types.DATETIME_TYPE_SET and
+                type1 == tq_types.STRING):
+            raise TypeError('Expected a string and a date, got %s.' % (
+                [type1, type2]))
+        return tq_types.STRING
+
+    def _evaluate(self, num_rows, formats, unix_timestamps):
+        format_str = _ensure_literal(formats.values)
+        timestamps = TimestampFunction().evaluate(num_rows, unix_timestamps)
+        convert = pass_through_none(lambda ts: ts.strftime(format_str))
+        values = [convert(x) for x in timestamps.values]
+        return context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE,
+                              values=values)
+
+
 class NumericArgReduceFunction(AggregateFunction):
     def __init__(self, reducer):
         self.reducer = reducer
@@ -1250,7 +1267,7 @@ def _evaluate(self, num_rows, json_expressions, json_paths):
             lambda dt: int(dt.strftime('%j'), 10),
             return_type=tq_types.INT),
         TimestampFunction()),
-    'format_timestamp': StrftimeFunction(),
+    'format_timestamp': FormatTimestampFunction(),
     'format_utc_usec': Compose(
         TimestampExtractFunction(
             lambda dt: dt.strftime('%Y-%m-%d %H:%M:%S.%f'),