datastax · absurdfarce · Jun 11, 2021 · Jul 30, 2021 · Aug 16, 2021 · Aug 16, 2021
diff --git a/python/adelphi/adelphi/cql.py b/python/adelphi/adelphi/cql.py
@@ -49,5 +49,5 @@ def export_schema(self, keyspace=None):
 
 
     def each_keyspace(self, ks_fn):
-        for (ks_name, ks_tuple) in self.keyspaces.items():
+        for (_, ks_tuple) in self.keyspaces.items():
             ks_fn(ks_tuple.ks_obj, ks_tuple.ks_id)
diff --git a/python/adelphi/adelphi/export.py b/python/adelphi/adelphi/export.py
@@ -15,7 +15,7 @@
 import hashlib
 import logging
 from base64 import urlsafe_b64encode
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
 from datetime import datetime, tzinfo, timedelta
 
 try:
@@ -95,7 +95,8 @@ def make_tuple(ks):
             if props['anonymize']:
                 anonymize_keyspace(ks)
             return KsTuple(ids[orig_name], ks)
-        return {t.ks_obj.name : t for t in [make_tuple(ks) for ks in keyspaces]}
+        tuples = sorted([make_tuple(ks) for ks in keyspaces], key=lambda ks: ks.ks_obj.name)
+        return OrderedDict([(t.ks_obj.name,t) for t in tuples])
 
 
     def get_cluster_metadata(self, cluster):

diff --git a/python/adelphi/adelphi/store.py b/python/adelphi/adelphi/store.py
@@ -16,6 +16,7 @@
 # Functions to facilitate interactions with the underlying data store
 
 import logging
+from collections import OrderedDict
 from itertools import tee
 
 # Account for name change in itertools as of py3k
@@ -59,6 +60,10 @@ def build_keyspace_objects(keyspaces, metadata):
     """Build a list of cassandra.metadata.KeyspaceMetadata objects from a list of strings and a c.m.Metadata instance.  System keyspaces will be excluded."""
     all_keyspace_objs = [metadata.keyspaces[ks] for ks in keyspaces] if keyspaces is not None else metadata.keyspaces.values()
 
+    # Make sure tables are ordered (by table name)
+    for ks_obj in all_keyspace_objs:
+        ks_obj.tables = OrderedDict(sorted(ks_obj.tables.items(), key=lambda item: item[0]))
+
     # Borrowed from itertools
     def partition(pred, iterable):
         t1, t2 = tee(iterable)

diff --git a/python/adelphi/bin/adelphi b/python/adelphi/bin/adelphi
@@ -14,11 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import argparse
 import json
 import logging
 import os
-import os.path
 import sys
 from functools import partial
 

diff --git a/python/adelphi/bin/test-adelphi b/python/adelphi/bin/test-adelphi
@@ -20,7 +20,7 @@ import tox
 
 
 # Default C* versions to include in all integration tests
-DEFAULT_CASSANDRA_VERSIONS = ["2.1.22", "2.2.19", "3.0.23", "3.11.10", "4.0-rc1"]
+DEFAULT_CASSANDRA_VERSIONS = ["2.1.22", "2.2.19", "3.0.23", "3.11.10", "4.0.0"]
 
 TOX_DEPENDENCIES = """pytest
     subprocess32 ~= 3.5
@@ -33,21 +33,26 @@ def runCassandraContainer(client, version):
     return client.containers.run(name="adelphi", remove=True, detach=True, ports={9042: 9042}, image="cassandra:{}".format(version))
 
 
-def writeToxIni(version):
+def writeToxIni(version, keep_tmpdirs = False):
     config = configparser.ConfigParser()
     config["tox"] = { "envlist": "py2, py3" }
     envs = {"CASSANDRA_VERSION": version}
+    if keep_tmpdirs:
+        print("Preserving temporary directories")
+        envs["KEEP_LOGS"] = True
+    envStr = "\n    ".join(["{} = {}".format(k,v) for (k,v) in envs.items()])
     config["testenv"] = {"deps": TOX_DEPENDENCIES, \
         "commands": "pytest {posargs}", \
-        "setenv": "CASSANDRA_VERSION = {}".format(version)}
+        "setenv": envStr}
     with open(TOX_CONFIG, 'w') as configfile:
         config.write(configfile)
 
 @click.command()
 @click.option('--cassandra', '-c', multiple=True, type=str)
 @click.option('--python', '-p', multiple=True, type=click.Choice(["py2","py3"], case_sensitive = False))
 @click.option("--pytest", "-t", type=str, help="Arguments to be passed to pytest")
-def runtests(cassandra, python, pytest):
+@click.option('--keep-tmpdirs', help="Preserve temporary directories", is_flag=True)
+def runtests(cassandra, python, pytest, keep_tmpdirs):
     client = docker.from_env()
     tox_args = ["-e {}".format(py) for py in python] if python else []
     if pytest:
@@ -68,7 +73,7 @@ def runtests(cassandra, python, pytest):
         try:
             if os.path.exists(TOX_CONFIG):
                 os.remove(TOX_CONFIG)
-            writeToxIni(version)
+            writeToxIni(version, keep_tmpdirs)
 
             # cmdline() will raise SystemExit when it's done so trap that here to avoid
             # exiting all the things

diff --git a/python/adelphi/tests/integration/__init__.py b/python/adelphi/tests/integration/__init__.py
@@ -62,6 +62,17 @@ def makeTempDirs(self):
         outputDir = os.path.join(base, "outputDir")
         os.mkdir(outputDir)
         self.dirs = TempDirs(base, outputDir)
+        self.addCleanup(self.cleanUpTempDirs)
+
+
+    def cleanUpTempDirs(self):
+        # TODO: Note that there's no easy way to access this from test-adelphi unless we modify the
+        # ini generation code... and I'm not completely sure that's worth it.  Might want to think
+        # about just deleting this outright... or making it a CLI option that can be easily accessed.
+        if "KEEP_LOGS" in os.environ:
+            log.info("KEEP_LOGS env var set, preserving logs/output at {}".format(self.dirs.basePath))
+        else:
+            shutil.rmtree(self.dirs.basePath)
 
 
     def setUp(self):
@@ -73,15 +84,3 @@ def setUp(self):
         log.info("Testing Cassandra version {}".format(self.version))
 
         self.makeTempDirs()
-
-
-    def tearDown(self):
-        super(SchemaTestCase, self).tearDown()
-
-        # TODO: Note that there's no easy way to access this from test-adelphi unless we modify the
-        # ini generation code... and I'm not completely sure that's worth it.  Might want to think
-        # about just deleting this outright... or making it a CLI option that can be easily accessed.
-        if "KEEP_LOGS" in os.environ:
-            log.info("KEEP_LOGS env var set, preserving logs/output at {}".format(self.dirs.basePath))
-        else:
-            shutil.rmtree(self.dirs.basePath)
diff --git a/...ion/resources/cql-schemas/4.0-rc1-ks0.cql → ...ation/resources/cql-schemas/4.0.0-ks0.cql b/...ion/resources/cql-schemas/4.0-rc1-ks0.cql → ...ation/resources/cql-schemas/4.0.0-ks0.cql
diff --git a/...gration/resources/cql-schemas/4.0-rc1.cql → ...tegration/resources/cql-schemas/4.0.0.cql b/...gration/resources/cql-schemas/4.0-rc1.cql → ...tegration/resources/cql-schemas/4.0.0.cql
diff --git a/...gration/resources/nb-schemas/4.0-rc1.yaml → ...tegration/resources/nb-schemas/4.0.0.yaml b/...gration/resources/nb-schemas/4.0-rc1.yaml → ...tegration/resources/nb-schemas/4.0.0.yaml
diff --git a/python/adelphi/tests/integration/test_cql.py b/python/adelphi/tests/integration/test_cql.py
@@ -1,6 +1,8 @@
+import difflib
 import glob
 import logging
 import os
+import re
 import shutil
 import sys
 
@@ -15,25 +17,36 @@
     import subprocess
 
 from tests.integration import SchemaTestCase, setupSchema, getAllKeyspaces, dropNewKeyspaces
-from tests.util.schemadiff import cqlDigestGenerator
 from tests.util.schema_util import get_schema
 
 log = logging.getLogger('adelphi')
 
 CQL_REFERENCE_SCHEMA_PATH = "tests/integration/resources/cql-schemas/{}.cql"
 CQL_REFERENCE_KS0_SCHEMA_PATH = "tests/integration/resources/cql-schemas/{}-ks0.cql"
 
-def digestSet(schemaFile):
-    rv = set()
-    for (_, digest) in cqlDigestGenerator(schemaFile):
-        rv.add(digest)
-    return rv
+KEYSPACE_LINE_REGEX = re.compile(r'\s*CREATE KEYSPACE IF NOT EXISTS (\w+) ')
 
+def linesWithNewline(fpath):
+    if not os.path.exists(fpath):
+        print("File {} does not exist".format(fpath))
+    if os.path.getsize(fpath) <= 0:
+        print("File {} is empty".format(fpath))
+    print("Reading lines for file {}".format(fpath))
+    with open(fpath) as f:
+        rv = f.readlines()
+        lastLine = rv[-1]
+        if not lastLine.endswith("\n"):
+            rv[-1] = lastLine + "\n"
+        return rv
 
-def logCqlDigest(schemaFile, digestSet):
-    for (cql, digest) in cqlDigestGenerator(schemaFile):
-        if digest in digestSet:
-            log.info("Digest: {}, CQL: {}".format(digest,cql))
+
+def extractKeyspaceName(schemaPath):
+    with open(schemaPath) as schemaFile:
+        for line in schemaFile:
+            matcher = KEYSPACE_LINE_REGEX.match(line)
+            if matcher:
+                return matcher.group(1)
+    return None
 
 
 class TestCql(SchemaTestCase):
@@ -44,11 +57,7 @@ def setUp(self):
         self.origKeyspaces = getAllKeyspaces()
         log.info("Creating schema")
         setupSchema(self.buildSchema())
-
-
-    def tearDown(self):
-        super(TestCql, self).tearDown()
-        dropNewKeyspaces(self.origKeyspaces)
+        self.addCleanup(dropNewKeyspaces, self.origKeyspaces)
 
 
     # ========================== Helper functions ==========================
@@ -64,19 +73,43 @@ def buildSchema(self):
 
 
     def compareToReferenceCql(self, referencePath, comparePath):
-        referenceSet = digestSet(referencePath)
-        compareSet = digestSet(comparePath)
+        compareLines = linesWithNewline(comparePath)
+        referenceLines = linesWithNewline(referencePath)
+
+        diffGen = difflib.unified_diff(
+            compareLines,
+            referenceLines,
+            fromfile=os.path.abspath(comparePath),
+            tofile=os.path.abspath(referencePath))
+
+        diffEmpty = True
+        for line in diffGen:
+            if diffEmpty:
+                print("Diff of generated file ({}) against reference file ({})".format(
+                    os.path.basename(comparePath),
+                    os.path.basename(referencePath)))
+            diffEmpty = False
+            print(line.strip())
 
-        refOnlySet = referenceSet - compareSet
-        if len(refOnlySet) > 0:
-            log.info("Statements in reference file {} but not in compare file {}:".format(referencePath, comparePath))
-            logCqlDigest(referencePath, refOnlySet)
-        compareOnlySet = compareSet - referenceSet
-        if len(compareOnlySet) > 0:
-            log.info("Statements in compare file {} but not in reference file {}:".format(comparePath, referencePath))
-            logCqlDigest(comparePath, compareOnlySet)
+        if not diffEmpty:
+            self.fail()
 
-        self.assertEqual(referenceSet, compareSet)
+
+    def combineSchemas(self):
+        outputDirPath = self.outputDirPath(self.version)
+        allOutputFileName = "{}-all".format(self.version)
+        allOutputPath = self.outputDirPath(allOutputFileName)
+
+        schemaPaths = glob.glob("{}/*/schema".format(outputDirPath))
+        self.assertGreater(len(schemaPaths), 0)
+        schemas = { extractKeyspaceName(p) : p for p in schemaPaths}
+        sortedKeyspaces = sorted(schemas.keys())
+
+        with open(allOutputPath, "w+") as allOutputFile:
+            cqlStr = "\n\n".join(open(schemas[ks]).read() for ks in sortedKeyspaces)
+            allOutputFile.write(cqlStr)
+
+        return allOutputPath
 
 
     # ========================== Test functions ==========================
@@ -87,7 +120,7 @@ def test_stdout(self):
 
         self.compareToReferenceCql(
             CQL_REFERENCE_SCHEMA_PATH.format(self.version), 
-            self.stdoutPath(self.version))
+            stdoutPath)
 
 
     def test_outputdir(self):
@@ -96,24 +129,9 @@ def test_outputdir(self):
         os.mkdir(outputDirPath)
         subprocess.run("adelphi --output-dir={} export-cql --no-metadata 2>> {}".format(outputDirPath, stderrPath), shell=True)
 
-        # Basic idea here is to find all schemas written to the output dir and aggregate them into a single schema
-        # file.  We then compare this aggregated file to the reference schema.  Ordering is important here but
-        # the current keyspace names hash to something that causes individual keyspaces to be discovered in the
-        # correct order.
-        outputDirPath = self.outputDirPath(self.version)
-        allOutputFileName = "{}-all".format(self.version)
-        allOutputPath = self.outputDirPath(allOutputFileName)
-
-        outputSchemas = glob.glob("{}/*/schema".format(outputDirPath))
-        self.assertGreater(len(outputSchemas), 0)
-        with open(allOutputPath, "w+") as allOutputFile:
-            for outputSchema in outputSchemas:
-                with open(outputSchema) as outputSchemaFile:
-                    shutil.copyfileobj(outputSchemaFile, allOutputFile)
-                    allOutputFile.write("\n")
         self.compareToReferenceCql(
             CQL_REFERENCE_SCHEMA_PATH.format(self.version), 
-            allOutputPath)
+            self.combineSchemas())
 
 
     def test_some_keyspaces_stdout(self):
@@ -123,7 +141,7 @@ def test_some_keyspaces_stdout(self):
 
         self.compareToReferenceCql(
             CQL_REFERENCE_KS0_SCHEMA_PATH.format(self.version), 
-            self.stdoutPath(self.version))
+            stdoutPath)
 
 
     def test_some_keyspaces_outputdir(self):

diff --git a/python/adelphi/tests/util/schema_util.py b/python/adelphi/tests/util/schema_util.py
@@ -1,9 +1,10 @@
+from functools import partial
+
 from cassandra.metadata import Metadata,\
 	KeyspaceMetadata,\
 	TableMetadata,\
 	ColumnMetadata,\
 	IndexMetadata,\
-	SimpleStrategy, \
 	UserType
 
 # types compatible with C* 2.1+
@@ -92,21 +93,17 @@ def get_keyspace(name, durable_writes, strategy_class, strategy_options, sasi=Tr
 	return keyspace
 
 def get_schema(sasi=True):
-	# build a couple of keyspaces
-	keyspaces = []
-	for k in range(2):
-		keyspace = get_keyspace("my_ks_%s" % k, True, "SimpleStrategy", {"replication_factor": 1}, sasi=sasi)
-		keyspaces.append(keyspace)
-
 	schema = Metadata()
-	schema.keyspaces = keyspaces
+	buildKs = partial(
+		get_keyspace,
+		durable_writes=True,
+		strategy_class="SimpleStrategy",
+		strategy_options={"replication_factor": 1},
+		sasi=sasi)
+	schema.keyspaces = [buildKs("my_ks_%s" % k) for k in range(2)]
 	return schema
 
 if __name__ == "__main__":
-	"""
-	Use this to print the test schema.
-	The output can be used in the integration tests too.
-	"""
-        # As discussed elsewhere SASI support is disabled until https://github.com/datastax/adelphi/issues/105
-        # is completed
-	print("\n\n".join(ks.export_as_string() for ks in get_schema(sasi=False).keyspaces))
+    # As discussed elsewhere SASI support is disabled until https://github.com/datastax/adelphi/issues/105
+    # is completed
+    print("\n\n".join(ks.export_as_string() for ks in get_schema(sasi=False).keyspaces))
diff --git a/python/adelphi/tests/util/schemadiff.py b/python/adelphi/tests/util/schemadiff.py