From 98620b88d459ad22613f901ed77ba1e41d9414bd Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Tue, 23 Apr 2024 17:50:22 -0400
Subject: [PATCH 1/6] Updates string dialect to support multi-indexed column
 names

---
 hatchet/query/string_dialect.py | 599 +++++++++++++++++++++-----------
 hatchet/tests/query.py          |  80 ++---
 2 files changed, 444 insertions(+), 235 deletions(-)

diff --git a/hatchet/query/string_dialect.py b/hatchet/query/string_dialect.py
index ed4e8714..5e0341b3 100644
--- a/hatchet/query/string_dialect.py
+++ b/hatchet/query/string_dialect.py
@@ -18,7 +18,7 @@
 
 
 # PEG grammar for the String-based dialect
-CYPHER_GRAMMAR = u"""
+CYPHER_GRAMMAR = """
 FullQuery: path_expr=MatchExpr(cond_expr=WhereExpr)?;
 MatchExpr: 'MATCH' path=PathQuery;
 PathQuery: '(' nodes=NodeExpr ')'('->' '(' nodes=NodeExpr ')')*;
@@ -32,26 +32,27 @@
 UnaryCond: NotCond | SingleCond;
 NotCond: 'NOT' subcond=SingleCond;
 SingleCond: StringCond | NumberCond | NoneCond | NotNoneCond | LeafCond | NotLeafCond;
-NoneCond: name=ID '.' prop=STRING 'IS NONE';
-NotNoneCond: name=ID '.' prop=STRING 'IS NOT NONE';
+NoneCond: name=ID '.' prop=MetricId 'IS NONE';
+NotNoneCond: name=ID '.' prop=MetricId 'IS NOT NONE';
 LeafCond: name=ID 'IS LEAF';
 NotLeafCond: name=ID 'IS NOT LEAF';
 StringCond: StringEq | StringStartsWith | StringEndsWith | StringContains | StringMatch;
-StringEq: name=ID '.' prop=STRING '=' val=STRING;
-StringStartsWith: name=ID '.' prop=STRING 'STARTS WITH' val=STRING;
-StringEndsWith: name=ID '.' prop=STRING 'ENDS WITH' val=STRING;
-StringContains: name=ID '.' prop=STRING 'CONTAINS' val=STRING;
-StringMatch: name=ID '.' prop=STRING '=~' val=STRING;
+StringEq: name=ID '.' prop=MetricId '=' val=STRING;
+StringStartsWith: name=ID '.' prop=MetricId 'STARTS WITH' val=STRING;
+StringEndsWith: name=ID '.' prop=MetricId 'ENDS WITH' val=STRING;
+StringContains: name=ID '.' prop=MetricId 'CONTAINS' val=STRING;
+StringMatch: name=ID '.' prop=MetricId '=~' val=STRING;
 NumberCond: NumEq | NumLt | NumGt | NumLte | NumGte | NumNan | NumNotNan | NumInf | NumNotInf;
-NumEq: name=ID '.' prop=STRING '=' val=NUMBER;
-NumLt: name=ID '.' prop=STRING '<' val=NUMBER;
-NumGt: name=ID '.' prop=STRING '>' val=NUMBER;
-NumLte: name=ID '.' prop=STRING '<=' val=NUMBER;
-NumGte: name=ID '.' prop=STRING '>=' val=NUMBER;
-NumNan: name=ID '.' prop=STRING 'IS NAN';
-NumNotNan: name=ID '.' prop=STRING 'IS NOT NAN';
-NumInf: name=ID '.' prop=STRING 'IS INF';
-NumNotInf: name=ID '.' prop=STRING 'IS NOT INF';
+NumEq: name=ID '.' prop=MetricId '=' val=NUMBER;
+NumLt: name=ID '.' prop=MetricId '<' val=NUMBER;
+NumGt: name=ID '.' prop=MetricId '>' val=NUMBER;
+NumLte: name=ID '.' prop=MetricId '<=' val=NUMBER;
+NumGte: name=ID '.' prop=MetricId '>=' val=NUMBER;
+NumNan: name=ID '.' prop=MetricId 'IS NAN';
+NumNotNan: name=ID '.' prop=MetricId 'IS NOT NAN';
+NumInf: name=ID '.' prop=MetricId 'IS INF';
+NumNotInf: name=ID '.' prop=MetricId 'IS NOT INF';
+MetricId: '(' ids+=STRING[','] ')' | ids=STRING;
 """
 
 # TextX metamodel for the String-based dialect
@@ -85,8 +86,14 @@ def filter_check_types(type_check, df_row, filt_lambda):
         return False
 
 
-class StringQuery(Query):
+########################################################################
+# NOTE: the use of single and double quotes in processing string-dialect
+#       queries is EXTREMELY important. Inner strings (e.g., for metric
+#       names) MUST use single quotes.
+########################################################################
+
 
+class StringQuery(Query):
     """Class for representing and parsing queries using the String-based dialect."""
 
     def __init__(self, cypher_query, multi_index_mode="off"):
@@ -275,14 +282,14 @@ def _parse_single_cond(self, obj):
 
     def _parse_none(self, obj):
         """Parses 'property IS NONE'."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "df_row.name._depth is None",
                 None,
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -292,7 +299,11 @@ def _parse_none(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] is None'.format(obj.prop),
+            "df_row[{}] is None".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
             None,
         ]
 
@@ -302,47 +313,43 @@ def _add_aggregation_call_to_multi_idx_predicate(self, predicate):
         return predicate + ".all()"
 
     def _parse_none_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "df_row.index.get_level_values('node')[0]._depth is None",
                 None,
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
                 "df_row.index.get_level_values('node')[0]._hatchet_nid is None",
                 None,
             ]
-        if self.multi_index_mode == "any":
-            return [
-                None,
-                obj.name,
-                "df_row['{}'].apply(lambda elem: elem is None).any()".format(obj.prop),
-                None,
-            ]
-        # if self.multi_index_mode == "all":
         return [
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                "df_row['{}'].apply(lambda elem: elem is None)".format(obj.prop)
+                "df_row[{}].apply(lambda elem: elem is None)".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
             ),
             None,
         ]
 
     def _parse_not_none(self, obj):
         """Parses 'property IS NOT NONE'."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "df_row.name._depth is not None",
                 None,
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -352,19 +359,23 @@ def _parse_not_none(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] is not None'.format(obj.prop),
+            "df_row[{}] is not None".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
             None,
         ]
 
     def _parse_not_none_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "df_row.index.get_level_values('node')[0]._depth is not None",
                 None,
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -375,7 +386,11 @@ def _parse_not_none_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                "df_row['{}'].apply(lambda elem: elem is not None)".format(obj.prop)
+                "df_row[{}].apply(lambda elem: elem is not None)".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
             ),
             None,
         ]
@@ -465,8 +480,17 @@ def _parse_str_eq(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] == "{}"'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], str)".format(obj.prop),
+            'df_row[{}] == "{}"'.format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], str)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_eq_multi_idx(self, obj):
@@ -474,11 +498,18 @@ def _parse_str_eq_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem == "{}")'.format(
-                    obj.prop, obj.val
+                'df_row[{}].apply(lambda elem: elem == "{}")'.format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
                 )
             ),
-            "is_string_dtype(df_row['{}'])".format(obj.prop),
+            "is_string_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_starts_with(self, obj):
@@ -486,8 +517,17 @@ def _parse_str_starts_with(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"].startswith("{}")'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], str)".format(obj.prop),
+            'df_row[{}].startswith("{}")'.format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], str)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_starts_with_multi_idx(self, obj):
@@ -495,11 +535,18 @@ def _parse_str_starts_with_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem.startswith("{}"))'.format(
-                    obj.prop, obj.val
+                'df_row[{}].apply(lambda elem: elem.startswith("{}"))'.format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
                 )
             ),
-            "is_string_dtype(df_row['{}'])".format(obj.prop),
+            "is_string_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_ends_with(self, obj):
@@ -507,8 +554,17 @@ def _parse_str_ends_with(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"].endswith("{}")'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], str)".format(obj.prop),
+            'df_row[{}].endswith("{}")'.format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], str)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_ends_with_multi_idx(self, obj):
@@ -516,11 +572,18 @@ def _parse_str_ends_with_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem.endswith("{}"))'.format(
-                    obj.prop, obj.val
+                'df_row[{}].apply(lambda elem: elem.endswith("{}"))'.format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
                 )
             ),
-            "is_string_dtype(df_row['{}'])".format(obj.prop),
+            "is_string_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_contains(self, obj):
@@ -528,8 +591,17 @@ def _parse_str_contains(self, obj):
         return [
             None,
             obj.name,
-            '"{}" in df_row["{}"]'.format(obj.val, obj.prop),
-            "isinstance(df_row['{}'], str)".format(obj.prop),
+            '"{}" in df_row[{}]'.format(
+                obj.val,
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+            ),
+            "isinstance(df_row[{}], str)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_contains_multi_idx(self, obj):
@@ -537,11 +609,18 @@ def _parse_str_contains_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: "{}" in elem)'.format(
-                    obj.prop, obj.val
+                'df_row[{}].apply(lambda elem: "{}" in elem)'.format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
                 )
             ),
-            "is_string_dtype(df_row['{}'])".format(obj.prop),
+            "is_string_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_match(self, obj):
@@ -549,8 +628,17 @@ def _parse_str_match(self, obj):
         return [
             None,
             obj.name,
-            're.match("{}", df_row["{}"]) is not None'.format(obj.val, obj.prop),
-            "isinstance(df_row['{}'], str)".format(obj.prop),
+            're.match("{}", df_row[{}]) is not None'.format(
+                obj.val,
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+            ),
+            "isinstance(df_row[{}], str)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_str_match_multi_idx(self, obj):
@@ -558,11 +646,18 @@ def _parse_str_match_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: re.match("{}", elem) is not None)'.format(
-                    obj.prop, obj.val
+                'df_row[{}].apply(lambda elem: re.match("{}", elem) is not None)'.format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
                 )
             ),
-            "is_string_dtype(df_row['{}'])".format(obj.prop),
+            "is_string_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num(self, obj):
@@ -591,7 +686,7 @@ def _parse_num(self, obj):
 
     def _parse_num_eq(self, obj):
         """Processes numeric equivalence predicates."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val == -1:
                 return [
                     None,
@@ -606,9 +701,7 @@ def _parse_num_eq(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -623,7 +716,7 @@ def _parse_num_eq(self, obj):
                 "df_row.name._depth == {}".format(obj.val),
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -631,9 +724,7 @@ def _parse_num_eq(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -651,12 +742,21 @@ def _parse_num_eq(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] == {}'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "df_row[{}] == {}".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_eq_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val == -1:
                 return [
                     None,
@@ -671,9 +771,7 @@ def _parse_num_eq_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -688,7 +786,7 @@ def _parse_num_eq_multi_idx(self, obj):
                 "df_row.index.get_level_values('node')[0]._depth == {}".format(obj.val),
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -696,9 +794,7 @@ def _parse_num_eq_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -719,14 +815,23 @@ def _parse_num_eq_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem == {})'.format(obj.prop, obj.val)
+                "df_row[{}].apply(lambda elem: elem == {})".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_lt(self, obj):
         """Processes numeric less-than predicates."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -734,9 +839,7 @@ def _parse_num_lt(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -751,7 +854,7 @@ def _parse_num_lt(self, obj):
                 "df_row.name._depth < {}".format(obj.val),
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -759,9 +862,7 @@ def _parse_num_lt(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -779,12 +880,21 @@ def _parse_num_lt(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] < {}'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "df_row[{}] < {}".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_lt_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -792,9 +902,7 @@ def _parse_num_lt_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -809,7 +917,7 @@ def _parse_num_lt_multi_idx(self, obj):
                 "df_row.index.get_level_values('node')[0]._depth < {}".format(obj.val),
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -817,9 +925,7 @@ def _parse_num_lt_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -840,14 +946,23 @@ def _parse_num_lt_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem < {})'.format(obj.prop, obj.val)
+                "df_row[{}].apply(lambda elem: elem < {})".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_gt(self, obj):
         """Processes numeric greater-than predicates."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -855,9 +970,7 @@ def _parse_num_gt(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -872,7 +985,7 @@ def _parse_num_gt(self, obj):
                 "df_row.name._depth > {}".format(obj.val),
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -880,9 +993,7 @@ def _parse_num_gt(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -900,12 +1011,21 @@ def _parse_num_gt(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] > {}'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "df_row[{}] > {}".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_gt_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -913,9 +1033,7 @@ def _parse_num_gt_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -930,7 +1048,7 @@ def _parse_num_gt_multi_idx(self, obj):
                 "df_row.index.get_level_values('node')[0]._depth > {}".format(obj.val),
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -938,9 +1056,7 @@ def _parse_num_gt_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -961,14 +1077,23 @@ def _parse_num_gt_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem > {})'.format(obj.prop, obj.val)
+                "df_row[{}].apply(lambda elem: elem > {})".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_lte(self, obj):
         """Processes numeric less-than-or-equal-to predicates."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -976,9 +1101,7 @@ def _parse_num_lte(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -993,7 +1116,7 @@ def _parse_num_lte(self, obj):
                 "df_row.name._depth <= {}".format(obj.val),
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1001,9 +1124,7 @@ def _parse_num_lte(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1021,12 +1142,21 @@ def _parse_num_lte(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] <= {}'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "df_row[{}] <= {}".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_lte_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1034,9 +1164,7 @@ def _parse_num_lte_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1051,7 +1179,7 @@ def _parse_num_lte_multi_idx(self, obj):
                 "df_row.index.get_level_values('node')[0]._depth <= {}".format(obj.val),
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1059,9 +1187,7 @@ def _parse_num_lte_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1082,14 +1208,23 @@ def _parse_num_lte_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem <= {})'.format(obj.prop, obj.val)
+                "df_row[{}].apply(lambda elem: elem <= {})".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_gte(self, obj):
         """Processes numeric greater-than-or-equal-to predicates."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1097,9 +1232,7 @@ def _parse_num_gte(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1114,7 +1247,7 @@ def _parse_num_gte(self, obj):
                 "df_row.name._depth >= {}".format(obj.val),
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1122,9 +1255,7 @@ def _parse_num_gte(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1142,12 +1273,21 @@ def _parse_num_gte(self, obj):
         return [
             None,
             obj.name,
-            'df_row["{}"] >= {}'.format(obj.prop, obj.val),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "df_row[{}] >= {}".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0]),
+                obj.val,
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_gte_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1155,9 +1295,7 @@ def _parse_num_gte_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1172,7 +1310,7 @@ def _parse_num_gte_multi_idx(self, obj):
                 "df_row.index.get_level_values('node')[0]._depth >= {}".format(obj.val),
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids == "node_id":
             if obj.val < 0:
                 warnings.warn(
                     """
@@ -1180,9 +1318,7 @@ def _parse_num_gte_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(
-                        obj
-                    ),
+                    """.format(obj),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1203,21 +1339,30 @@ def _parse_num_gte_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'df_row["{}"].apply(lambda elem: elem >= {})'.format(obj.prop, obj.val)
+                "df_row[{}].apply(lambda elem: elem >= {})".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0]),
+                    obj.val,
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_nan(self, obj):
         """Processes predicates that check for NaN."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "pd.isna(df_row.name._depth)",
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1227,19 +1372,27 @@ def _parse_num_nan(self, obj):
         return [
             None,
             obj.name,
-            'pd.isna(df_row["{}"])'.format(obj.prop),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "pd.isna(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_nan_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "pd.isna(df_row.index.get_level_values('node')[0]._depth)",
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1250,21 +1403,29 @@ def _parse_num_nan_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'pd.isna(df_row["{}"])'.format(obj.prop)
+                "pd.isna(df_row[{}])".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_not_nan(self, obj):
         """Processes predicates that check for NaN."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "not pd.isna(df_row.name._depth)",
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1274,19 +1435,27 @@ def _parse_num_not_nan(self, obj):
         return [
             None,
             obj.name,
-            'not pd.isna(df_row["{}"])'.format(obj.prop),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "not pd.isna(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_not_nan_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "not pd.isna(df_row.index.get_level_values('node')[0]._depth)",
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1297,21 +1466,29 @@ def _parse_num_not_nan_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'not pd.isna(df_row["{}"])'.format(obj.prop)
+                "not pd.isna(df_row[{}])".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_inf(self, obj):
         """Processes predicates that check for Infinity."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "np.isinf(df_row.name._depth)",
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1321,19 +1498,27 @@ def _parse_num_inf(self, obj):
         return [
             None,
             obj.name,
-            'np.isinf(df_row["{}"])'.format(obj.prop),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "np.isinf(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_inf_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "np.isinf(df_row.index.get_level_values('node')[0]._depth)",
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1344,21 +1529,29 @@ def _parse_num_inf_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'np.isinf(df_row["{}"])'.format(obj.prop)
+                "np.isinf(df_row[{}])".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
     def _parse_num_not_inf(self, obj):
         """Processes predicates that check for not-Infinity."""
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "not np.isinf(df_row.name._depth)",
                 "isinstance(df_row.name._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1368,19 +1561,27 @@ def _parse_num_not_inf(self, obj):
         return [
             None,
             obj.name,
-            'not np.isinf(df_row["{}"])'.format(obj.prop),
-            "isinstance(df_row['{}'], Real)".format(obj.prop),
+            "not np.isinf(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
+            "isinstance(df_row[{}], Real)".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
+            ),
         ]
 
     def _parse_num_not_inf_multi_idx(self, obj):
-        if obj.prop == "depth":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "depth":
             return [
                 None,
                 obj.name,
                 "not np.isinf(df_row.index.get_level_values('node')[0]._depth)",
                 "isinstance(df_row.index.get_level_values('node')[0]._depth, Real)",
             ]
-        if obj.prop == "node_id":
+        if len(obj.prop.ids) == 1 and obj.prop.ids[0] == "node_id":
             return [
                 None,
                 obj.name,
@@ -1391,9 +1592,17 @@ def _parse_num_not_inf_multi_idx(self, obj):
             None,
             obj.name,
             self._add_aggregation_call_to_multi_idx_predicate(
-                'not np.isinf(df_row["{}"])'.format(obj.prop)
+                "not np.isinf(df_row[{}])".format(
+                    str(tuple(obj.prop.ids))
+                    if len(obj.prop.ids) > 1
+                    else "'{}'".format(obj.prop.ids[0])
+                )
+            ),
+            "is_numeric_dtype(df_row[{}])".format(
+                str(tuple(obj.prop.ids))
+                if len(obj.prop.ids) > 1
+                else "'{}'".format(obj.prop.ids[0])
             ),
-            "is_numeric_dtype(df_row['{}'])".format(obj.prop),
         ]
 
 
diff --git a/hatchet/tests/query.py b/hatchet/tests/query.py
index f2705aa3..15c56d69 100644
--- a/hatchet/tests/query.py
+++ b/hatchet/tests/query.py
@@ -382,9 +382,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
-        "list"
-    ] = DummyType()
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
+        DummyType()
+    )
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = [{"name": "foo"}, {"name": "bar"}, {"list": DummyType()}]
     query = ObjectQuery(path)
@@ -821,16 +821,16 @@ def test_construct_string_dialect():
     mock_node_ibv = {"name": "ibv_reg_mr"}
     mock_node_time_true = {"time (inc)": 0.1}
     mock_node_time_false = {"time (inc)": 0.001}
-    path1 = u"""MATCH (p)->("*")->(q)
+    path1 = """MATCH (p)->("*")->(q)
     WHERE p."name" STARTS WITH "MPI_" AND q."name" STARTS WITH "ibv"
     """
-    path2 = u"""MATCH (p)->(2)->(q)
+    path2 = """MATCH (p)->(2)->(q)
     WHERE p."name" STARTS WITH "MPI_" AND q."name" STARTS WITH "ibv"
     """
-    path3 = u"""MATCH (p)->("+", a)->(q)
+    path3 = """MATCH (p)->("+", a)->(q)
     WHERE p."name" STARTS WITH "MPI" AND a."time (inc)" >= 0.1 AND q."name" STARTS WITH "ibv"
     """
-    path4 = u"""MATCH (p)->(3, a)->(q)
+    path4 = """MATCH (p)->(3, a)->(q)
     WHERE p."name" STARTS WITH "MPI" AND a."time (inc)" = 0.1 AND q."name" STARTS WITH "ibv"
     """
     query1 = StringQuery(path1)
@@ -893,7 +893,7 @@ def test_construct_string_dialect():
     assert not query4.query_pattern[3][1](mock_node_time_false)
     assert query4.query_pattern[4][0] == "."
 
-    invalid_path = u"""MATCH (p)->({"bad": "wildcard"}, a)->(q)
+    invalid_path = """MATCH (p)->({"bad": "wildcard"}, a)->(q)
     WHERE p."name" STARTS WITH "MPI" AND a."time (inc)" = 0.1 AND
     q."name" STARTS WITH "ibv"
     """
@@ -903,7 +903,7 @@ def test_construct_string_dialect():
 
 def test_apply_string_dialect(mock_graph_literal):
     gf = GraphFrame.from_literal(mock_graph_literal)
-    path = u"""MATCH (p)->(2, q)->("*", r)->(s)
+    path = """MATCH (p)->(2, q)->("*", r)->(s)
     WHERE p."time (inc)" >= 30.0 AND NOT q."name" STARTS WITH "b"
     AND r."name" =~ "[^b][a-z]+" AND s."name" STARTS WITH "gr"
     """
@@ -920,7 +920,7 @@ def test_apply_string_dialect(mock_graph_literal):
 
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH (p)->(".")->(q)->("*")
+    path = """MATCH (p)->(".")->(q)->("*")
     WHERE p."time (inc)" >= 30.0 AND q."name" = "bar"
     """
     match = [
@@ -933,14 +933,14 @@ def test_apply_string_dialect(mock_graph_literal):
     query = StringQuery(path)
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH (p)->(q)->(r)
+    path = """MATCH (p)->(q)->(r)
     WHERE p."name" = "foo" AND q."name" = "bar" AND r."time" = 5.0
     """
     match = [root, root.children[0], root.children[0].children[0]]
     query = StringQuery(path)
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH (p)->(q)->("+", r)
+    path = """MATCH (p)->(q)->("+", r)
     WHERE p."name" = "foo" AND q."name" = "qux" AND r."time (inc)" > 15.0
     """
     match = [
@@ -953,7 +953,7 @@ def test_apply_string_dialect(mock_graph_literal):
     query = StringQuery(path)
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH (p)->(q)
+    path = """MATCH (p)->(q)
     WHERE p."time (inc)" > 100 OR p."time (inc)" <= 30 AND q."time (inc)" = 20
     """
     roots = gf.graph.roots
@@ -966,21 +966,21 @@ def test_apply_string_dialect(mock_graph_literal):
     query = StringQuery(path)
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH (p)->("*", q)->(r)
+    path = """MATCH (p)->("*", q)->(r)
     WHERE p."name" = "this" AND q."name" = "is" AND r."name" = "nonsense"
     """
 
     query = StringQuery(path)
     assert engine.apply(query, gf.graph, gf.dataframe) == []
 
-    path = u"""MATCH (p)->("*")->(q)
+    path = """MATCH (p)->("*")->(q)
     WHERE p."name" = 5 AND q."name" = "whatever"
     """
     with pytest.raises(InvalidQueryFilter):
         query = StringQuery(path)
         engine.apply(query, gf.graph, gf.dataframe)
 
-    path = u"""MATCH (p)->("*")->(q)
+    path = """MATCH (p)->("*")->(q)
     WHERE p."time" = "badstring" AND q."name" = "whatever"
     """
     query = StringQuery(path)
@@ -994,18 +994,18 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
-        "list"
-    ] = DummyType()
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
+        DummyType()
+    )
     gf = GraphFrame.from_literal(bad_field_test_dict)
-    path = u"""MATCH (p)->(q)->(r)
+    path = """MATCH (p)->(q)->(r)
     WHERE p."name" = "foo" AND q."name" = "bar" AND p."list" = DummyType()
     """
     with pytest.raises(InvalidQueryPath):
         query = StringQuery(path)
         engine.apply(query, gf.graph, gf.dataframe)
 
-    path = u"""MATCH ("*")->(p)->(q)->("*")
+    path = """MATCH ("*")->(p)->(q)->("*")
     WHERE p."name" = "bar" AND q."name" = "grault"
     """
     match = [
@@ -1052,7 +1052,7 @@ def __init__(self):
     query = StringQuery(path)
     assert sorted(engine.apply(query, gf.graph, gf.dataframe)) == sorted(match)
 
-    path = u"""MATCH ("*")->(p)->(q)->("+")
+    path = """MATCH ("*")->(p)->(q)->("+")
     WHERE p."name" = "bar" AND q."name" = "grault"
     """
     query = StringQuery(path)
@@ -1060,7 +1060,7 @@ def __init__(self):
 
     gf.dataframe["time"] = np.NaN
     gf.dataframe.at[gf.graph.roots[0], "time"] = 5.0
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."time" IS NOT NAN"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1068,7 +1068,7 @@ def __init__(self):
 
     gf.dataframe["time"] = 5.0
     gf.dataframe.at[gf.graph.roots[0], "time"] = np.NaN
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."time" IS NAN"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1076,7 +1076,7 @@ def __init__(self):
 
     gf.dataframe["time"] = np.Inf
     gf.dataframe.at[gf.graph.roots[0], "time"] = 5.0
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."time" IS NOT INF"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1084,7 +1084,7 @@ def __init__(self):
 
     gf.dataframe["time"] = 5.0
     gf.dataframe.at[gf.graph.roots[0], "time"] = np.Inf
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."time" IS INF"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1093,7 +1093,7 @@ def __init__(self):
     names = gf.dataframe["name"].copy()
     gf.dataframe["name"] = None
     gf.dataframe.at[gf.graph.roots[0], "name"] = names.iloc[0]
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."name" IS NOT NONE"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1101,7 +1101,7 @@ def __init__(self):
 
     gf.dataframe["name"] = names
     gf.dataframe.at[gf.graph.roots[0], "name"] = None
-    path = u"""MATCH ("*", p)
+    path = """MATCH ("*", p)
     WHERE p."name" IS NONE"""
     match = [gf.graph.roots[0]]
     query = StringQuery(path)
@@ -1111,13 +1111,13 @@ def __init__(self):
 def test_string_conj_compound_query(mock_graph_literal):
     gf = GraphFrame.from_literal(mock_graph_literal)
     compound_query1 = parse_string_dialect(
-        u"""
+        """
         {MATCH ("*", p) WHERE p."time (inc)" >= 20 AND p."time (inc)" <= 60}
         AND {MATCH ("*", p) WHERE p."time (inc)" >= 60}
         """
     )
     compound_query2 = parse_string_dialect(
-        u"""
+        """
         MATCH ("*", p)
         WHERE {p."time (inc)" >= 20 AND p."time (inc)" <= 60} AND {p."time (inc)" >= 60}
         """
@@ -1139,13 +1139,13 @@ def test_string_conj_compound_query(mock_graph_literal):
 def test_string_disj_compound_query(mock_graph_literal):
     gf = GraphFrame.from_literal(mock_graph_literal)
     compound_query1 = parse_string_dialect(
-        u"""
+        """
         {MATCH ("*", p) WHERE p."time (inc)" = 5.0}
         OR {MATCH ("*", p) WHERE p."time (inc)" = 10.0}
         """
     )
     compound_query2 = parse_string_dialect(
-        u"""
+        """
         MATCH ("*", p)
         WHERE {p."time (inc)" = 5.0} OR {p."time (inc)" = 10.0}
         """
@@ -1174,13 +1174,13 @@ def test_string_disj_compound_query(mock_graph_literal):
 def test_cypher_exc_disj_compound_query(mock_graph_literal):
     gf = GraphFrame.from_literal(mock_graph_literal)
     compound_query1 = parse_string_dialect(
-        u"""
+        """
         {MATCH ("*", p) WHERE p."time (inc)" >= 5.0 AND p."time (inc)" <= 10.0}
         XOR {MATCH ("*", p) WHERE p."time (inc)" = 10.0}
         """
     )
     compound_query2 = parse_string_dialect(
-        u"""
+        """
         MATCH ("*", p)
         WHERE {p."time (inc)" >= 5.0 AND p."time (inc)" <= 10.0} XOR {p."time (inc)" = 10.0}
         """
@@ -1215,19 +1215,19 @@ def test_leaf_query(small_mock2):
     nonleaves = list(nodes - set(matches))
     obj_query = ObjectQuery([{"depth": -1}])
     str_query_numeric = parse_string_dialect(
-        u"""
+        """
         MATCH (p)
         WHERE p."depth" = -1
         """
     )
     str_query_is_leaf = parse_string_dialect(
-        u"""
+        """
         MATCH (p)
         WHERE p IS LEAF
         """
     )
     str_query_is_not_leaf = parse_string_dialect(
-        u"""
+        """
         MATCH (p)
         WHERE p IS NOT LEAF
         """
@@ -1265,7 +1265,7 @@ def test_string_dialect_all_mode(tau_profile_dir):
     gf = GraphFrame.from_tau(tau_profile_dir)
     engine = QueryEngine()
     query = StringQuery(
-        u"""MATCH (".")->("+", p)
+        """MATCH (".")->("+", p)
         WHERE p."time (inc)" >= 17983.0
         """,
         multi_index_mode="all",
@@ -1296,7 +1296,7 @@ def test_string_dialect_any_mode(tau_profile_dir):
     gf = GraphFrame.from_tau(tau_profile_dir)
     engine = QueryEngine()
     query = StringQuery(
-        u"""MATCH (".", p)
+        """MATCH (".", p)
         WHERE p."time" < 24.0
         """,
         multi_index_mode="any",
@@ -1314,7 +1314,7 @@ def test_multi_index_mode_assertion_error(tau_profile_dir):
         _ = ObjectQuery([".", ("*", {"name": "test"})], multi_index_mode="foo")
     with pytest.raises(AssertionError):
         _ = StringQuery(
-            u""" MATCH (".")->("*", p)
+            """ MATCH (".")->("*", p)
             WHERE p."name" = "test"
             """,
             multi_index_mode="foo",

From 8e0a91a3b6b25b36c80407ed089a9e2802c8fb31 Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 24 Apr 2024 12:31:58 -0400
Subject: [PATCH 2/6] Adds support for multi-index columns via tuples in object
 dialect

---
 hatchet/query/object_dialect.py | 17 ++++---
 hatchet/query/string_dialect.py | 80 ++++++++++++++++++++++++---------
 hatchet/tests/query.py          | 32 ++++++++++---
 3 files changed, 98 insertions(+), 31 deletions(-)

diff --git a/hatchet/query/object_dialect.py b/hatchet/query/object_dialect.py
index e4010377..daf55c65 100644
--- a/hatchet/query/object_dialect.py
+++ b/hatchet/query/object_dialect.py
@@ -106,6 +106,9 @@ def filter_single_series(df_row, key, single_value):
 
         matches = True
         for k, v in attr_filter.items():
+            metric_name = k
+            if isinstance(k, (tuple, list)) and len(k) == 1:
+                metric_name = k[0]
             try:
                 _ = iter(v)
                 # Manually raise TypeError if v is a string so that
@@ -114,10 +117,12 @@ def filter_single_series(df_row, key, single_value):
                     raise TypeError
             # Runs if v is not iterable (e.g., list, tuple, etc.)
             except TypeError:
-                matches = matches and filter_single_series(df_row, k, v)
+                matches = matches and filter_single_series(df_row, metric_name, v)
             else:
                 for single_value in v:
-                    matches = matches and filter_single_series(df_row, k, single_value)
+                    matches = matches and filter_single_series(
+                        df_row, metric_name, single_value
+                    )
         return matches
 
     def filter_dframe(df_row):
@@ -186,16 +191,19 @@ def filter_single_dframe(node, df_row, key, single_value):
         matches = True
         node = df_row.name.to_frame().index[0][0]
         for k, v in attr_filter.items():
+            metric_name = k
+            if isinstance(k, (tuple, list)) and len(k) == 1:
+                metric_name = k[0]
             try:
                 _ = iter(v)
                 if isinstance(v, str):
                     raise TypeError
             except TypeError:
-                matches = matches and filter_single_dframe(node, df_row, k, v)
+                matches = matches and filter_single_dframe(node, df_row, metric_name, v)
             else:
                 for single_value in v:
                     matches = matches and filter_single_dframe(
-                        node, df_row, k, single_value
+                        node, df_row, metric_name, single_value
                     )
         return matches
 
@@ -208,7 +216,6 @@ def filter_choice(df_row):
 
 
 class ObjectQuery(Query):
-
     """Class for representing and parsing queries using the Object-based dialect."""
 
     def __init__(self, query, multi_index_mode="off"):
diff --git a/hatchet/query/string_dialect.py b/hatchet/query/string_dialect.py
index 5e0341b3..3d297dac 100644
--- a/hatchet/query/string_dialect.py
+++ b/hatchet/query/string_dialect.py
@@ -701,7 +701,9 @@ def _parse_num_eq(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -724,7 +726,9 @@ def _parse_num_eq(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -771,7 +775,9 @@ def _parse_num_eq_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -794,7 +800,9 @@ def _parse_num_eq_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -839,7 +847,9 @@ def _parse_num_lt(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -862,7 +872,9 @@ def _parse_num_lt(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -902,7 +914,9 @@ def _parse_num_lt_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -925,7 +939,9 @@ def _parse_num_lt_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -970,7 +986,9 @@ def _parse_num_gt(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -993,7 +1011,9 @@ def _parse_num_gt(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1033,7 +1053,9 @@ def _parse_num_gt_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1056,7 +1078,9 @@ def _parse_num_gt_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1101,7 +1125,9 @@ def _parse_num_lte(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1124,7 +1150,9 @@ def _parse_num_lte(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1164,7 +1192,9 @@ def _parse_num_lte_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1187,7 +1217,9 @@ def _parse_num_lte_multi_idx(self, obj):
                     This condition will always be false.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1232,7 +1264,9 @@ def _parse_num_gte(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1255,7 +1289,9 @@ def _parse_num_gte(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1295,7 +1331,9 @@ def _parse_num_gte_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
@@ -1318,7 +1356,9 @@ def _parse_num_gte_multi_idx(self, obj):
                     This condition will always be true.
                     The statement that triggered this warning is:
                     {}
-                    """.format(obj),
+                    """.format(
+                        obj
+                    ),
                     RedundantQueryFilterWarning,
                 )
                 return [
diff --git a/hatchet/tests/query.py b/hatchet/tests/query.py
index 15c56d69..ff133624 100644
--- a/hatchet/tests/query.py
+++ b/hatchet/tests/query.py
@@ -8,6 +8,7 @@
 import re
 
 import numpy as np
+import pandas as pd
 
 from hatchet import GraphFrame
 from hatchet.node import traversal_order
@@ -45,10 +46,18 @@ def test_construct_object_dialect():
         (3, {"time (inc)": 0.1}),
         {"name": "ibv[_a-zA-Z]*"},
     ]
+    # Note: the comma's in the keys are necessary. In Python, creating a tuple
+    #       from a single string results in a tuple containing every character of
+    #       the string as a separate element. In other words,
+    #       tuple("name") == ( "n", "a", "m", "e" ).
+    #       The comma tells Python to create a tuple with a single element. In other words,
+    #       ("name",) == tuple("name",) == ( "name" )
+    path5 = [{("name",): "MPI_[_a-zA-Z]*"}, "*", {("name",): "ibv[_a-zA-Z]*"}]
     query1 = ObjectQuery(path1)
     query2 = ObjectQuery(path2)
     query3 = ObjectQuery(path3)
     query4 = ObjectQuery(path4)
+    query5 = ObjectQuery(path5)
 
     assert query1.query_pattern[0][0] == "."
     assert query1.query_pattern[0][1](mock_node_mpi)
@@ -105,6 +114,17 @@ def test_construct_object_dialect():
     assert not query4.query_pattern[3][1](mock_node_time_false)
     assert query4.query_pattern[4][0] == "."
 
+    assert query5.query_pattern[0][0] == "."
+    assert query5.query_pattern[0][1](mock_node_mpi)
+    assert not query5.query_pattern[0][1](mock_node_ibv)
+    assert not query5.query_pattern[0][1](mock_node_time_true)
+    assert query5.query_pattern[1][0] == "*"
+    assert query5.query_pattern[1][1](mock_node_mpi)
+    assert query5.query_pattern[1][1](mock_node_ibv)
+    assert query5.query_pattern[1][1](mock_node_time_true)
+    assert query5.query_pattern[1][1](mock_node_time_false)
+    assert query5.query_pattern[2][0] == "."
+
     invalid_path = [
         {"name": "MPI_[_a-zA-Z]*"},
         ({"bad": "wildcard"}, {"time (inc)": 0.1}),
@@ -382,9 +402,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
-        DummyType()
-    )
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
+        "list"
+    ] = DummyType()
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = [{"name": "foo"}, {"name": "bar"}, {"list": DummyType()}]
     query = ObjectQuery(path)
@@ -994,9 +1014,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
-        DummyType()
-    )
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
+        "list"
+    ] = DummyType()
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = """MATCH (p)->(q)->(r)
     WHERE p."name" = "foo" AND q."name" = "bar" AND p."list" = DummyType()

From 2503a8df0da2642ee9417ea66a323a546ce3878f Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 24 Apr 2024 13:16:38 -0400
Subject: [PATCH 3/6] Adds support for Ruff

---
 hatchet/tests/query.py |  1 -
 pyproject.toml         | 23 ++++++++++++++++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/hatchet/tests/query.py b/hatchet/tests/query.py
index ff133624..a0bcb1e9 100644
--- a/hatchet/tests/query.py
+++ b/hatchet/tests/query.py
@@ -8,7 +8,6 @@
 import re
 
 import numpy as np
-import pandas as pd
 
 from hatchet import GraphFrame
 from hatchet.node import traversal_order
diff --git a/pyproject.toml b/pyproject.toml
index 47e4ff69..4dec4d54 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,5 @@
 [build-system]
-requires = [
-    "setuptools",
-    "wheel",
-    "Cython",
-]
+requires = ["setuptools", "wheel", "Cython"]
 build-backend = "setuptools.build_meta"
 
 [tool.poetry]
@@ -17,6 +13,23 @@ authors = [
 ]
 license = "MIT"
 
+[tool.ruff]
+line-length = 88
+target-version = 'py37'
+include = ['\.pyi?$']
+exclude = [
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+]
+
 [tool.black]
 line-length = 88
 target-version = ['py27', 'py35', 'py36', 'py37', 'py38']

From c8bcfdd56cc0f76ba69a0619b5226cc9b8b7b6af Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 24 Apr 2024 16:07:32 -0400
Subject: [PATCH 4/6] Adds test for parentheses around metric name for string
 dialect

---
 hatchet/tests/query.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/hatchet/tests/query.py b/hatchet/tests/query.py
index a0bcb1e9..e5dd3277 100644
--- a/hatchet/tests/query.py
+++ b/hatchet/tests/query.py
@@ -401,9 +401,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
-        "list"
-    ] = DummyType()
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
+        DummyType()
+    )
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = [{"name": "foo"}, {"name": "bar"}, {"list": DummyType()}]
     query = ObjectQuery(path)
@@ -852,10 +852,14 @@ def test_construct_string_dialect():
     path4 = """MATCH (p)->(3, a)->(q)
     WHERE p."name" STARTS WITH "MPI" AND a."time (inc)" = 0.1 AND q."name" STARTS WITH "ibv"
     """
+    path5 = """MATCH (p)->("*")->(q)
+    WHERE p.("name") STARTS WITH "MPI_" AND q.("name") STARTS WITH "ibv"
+    """
     query1 = StringQuery(path1)
     query2 = StringQuery(path2)
     query3 = StringQuery(path3)
     query4 = StringQuery(path4)
+    query5 = StringQuery(path5)
 
     assert query1.query_pattern[0][0] == "."
     assert query1.query_pattern[0][1](mock_node_mpi)
@@ -912,6 +916,17 @@ def test_construct_string_dialect():
     assert not query4.query_pattern[3][1](mock_node_time_false)
     assert query4.query_pattern[4][0] == "."
 
+    assert query5.query_pattern[0][0] == "."
+    assert query5.query_pattern[0][1](mock_node_mpi)
+    assert not query5.query_pattern[0][1](mock_node_ibv)
+    assert not query5.query_pattern[0][1](mock_node_time_true)
+    assert query5.query_pattern[1][0] == "*"
+    assert query5.query_pattern[1][1](mock_node_mpi)
+    assert query5.query_pattern[1][1](mock_node_ibv)
+    assert query5.query_pattern[1][1](mock_node_time_true)
+    assert query5.query_pattern[1][1](mock_node_time_false)
+    assert query5.query_pattern[2][0] == "."
+
     invalid_path = """MATCH (p)->({"bad": "wildcard"}, a)->(q)
     WHERE p."name" STARTS WITH "MPI" AND a."time (inc)" = 0.1 AND
     q."name" STARTS WITH "ibv"
@@ -1013,9 +1028,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
-        "list"
-    ] = DummyType()
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
+        DummyType()
+    )
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = """MATCH (p)->(q)->(r)
     WHERE p."name" = "foo" AND q."name" = "bar" AND p."list" = DummyType()

From 0b456a5ae14eb13d785d76720879522177ee2f9f Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 24 Apr 2024 16:59:35 -0400
Subject: [PATCH 5/6] Allows for both integers and strings in metric IDs for
 string dialect

---
 hatchet/query/string_dialect.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hatchet/query/string_dialect.py b/hatchet/query/string_dialect.py
index 3d297dac..1fb83c48 100644
--- a/hatchet/query/string_dialect.py
+++ b/hatchet/query/string_dialect.py
@@ -52,7 +52,8 @@
 NumNotNan: name=ID '.' prop=MetricId 'IS NOT NAN';
 NumInf: name=ID '.' prop=MetricId 'IS INF';
 NumNotInf: name=ID '.' prop=MetricId 'IS NOT INF';
-MetricId: '(' ids+=STRING[','] ')' | ids=STRING;
+MetricId: '(' ids+=SingleMetricId[','] ')' | ids=SingleMetricId;
+SingleMetricId: INT | STRING;
 """
 
 # TextX metamodel for the String-based dialect

From 55249460491b8174716886a1c1f0b4242064b3ad Mon Sep 17 00:00:00 2001
From: Ian Lumsden <lumsden.ian@gmail.com>
Date: Wed, 24 Apr 2024 17:05:54 -0400
Subject: [PATCH 6/6] Fixes QL compatibility warnings

---
 hatchet/query/compat.py | 32 +++++++-------------------------
 hatchet/tests/query.py  | 12 ++++++------
 2 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/hatchet/query/compat.py b/hatchet/query/compat.py
index b94433f2..d62a0c5c 100644
--- a/hatchet/query/compat.py
+++ b/hatchet/query/compat.py
@@ -33,7 +33,6 @@
 
 
 class AbstractQuery(ABC):
-
     """Base class for all 'old-style' queries."""
 
     @abstractmethod
@@ -87,7 +86,6 @@ def _get_new_query(self):
 
 
 class NaryQuery(AbstractQuery):
-
     """Base class for all compound queries that act on
     and merged N separate subqueries."""
 
@@ -149,7 +147,6 @@ def _convert_to_new_query(self, subqueries):
 
 
 class AndQuery(NaryQuery):
-
     """Compound query that returns the intersection of the results
     of the subqueries."""
 
@@ -160,8 +157,7 @@ def __init__(self, *args):
             *args (AbstractQuery, str, or list): the subqueries to be performed
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries (e.g., hatchet.query.ConjunctionQuery) instead.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries (e.g., hatchet.query.ConjunctionQuery) instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -181,7 +177,6 @@ def _convert_to_new_query(self, subqueries):
 
 
 class OrQuery(NaryQuery):
-
     """Compound query that returns the union of the results
     of the subqueries"""
 
@@ -192,8 +187,7 @@ def __init__(self, *args):
             *args (AbstractQuery, str, or list): the subqueries to be performed
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries (e.g., hatchet.query.DisjunctionQuery) instead.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries (e.g., hatchet.query.DisjunctionQuery) instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -213,7 +207,6 @@ def _convert_to_new_query(self, subqueries):
 
 
 class XorQuery(NaryQuery):
-
     """Compound query that returns the symmetric difference
     (i.e., set-based XOR) of the results of the subqueries"""
 
@@ -224,8 +217,7 @@ def __init__(self, *args):
             *args (AbstractQuery, str, or list): the subqueries to be performed
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries (e.g., hatchet.query.ExclusiveDisjunctionQuery) instead.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries (e.g., hatchet.query.ExclusiveDisjunctionQuery) instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -245,7 +237,6 @@ def _convert_to_new_query(self, subqueries):
 
 
 class NotQuery(NaryQuery):
-
     """Compound query that returns all nodes in the GraphFrame that
     are not returned from the subquery."""
 
@@ -256,8 +247,7 @@ def __init__(self, *args):
             *args (AbstractQuery, str, or list): the subquery to be performed
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries (e.g., hatchet.query.NegationQuery) instead.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries (e.g., hatchet.query.NegationQuery) instead.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -273,7 +263,6 @@ def _convert_to_new_query(self, subqueries):
 
 
 class QueryMatcher(AbstractQuery):
-
     """Processes and applies base syntax queries and Object-based queries to GraphFrames."""
 
     def __init__(self, query=None):
@@ -284,10 +273,7 @@ def __init__(self, query=None):
                                     into its internal representation
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries instead. For QueryMatcher, the equivalent \
-            new-style queries are hatchet.query.Query for base-syntax queries and \
-            hatchet.query.ObjectQuery for the object-dialect.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries instead. For QueryMatcher, the equivalent new-style queries are hatchet.query.Query for base-syntax queries and hatchet.query.ObjectQuery for the object-dialect.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -348,7 +334,6 @@ def _get_new_query(self):
 
 
 class CypherQuery(QueryMatcher):
-
     """Processes and applies Strinb-based queries to GraphFrames."""
 
     def __init__(self, cypher_query):
@@ -358,9 +343,7 @@ def __init__(self, cypher_query):
             cypher_query (str): the String-based query
         """
         warnings.warn(
-            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-            future. Please use new-style queries instead. For CypherQuery, the equivalent \
-            new-style query is hatchet.query.StringQuery.",
+            "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries instead. For CypherQuery, the equivalent new-style query is hatchet.query.StringQuery.",
             DeprecationWarning,
             stacklevel=2,
         )
@@ -386,8 +369,7 @@ def parse_cypher_query(cypher_query):
         (CypherQuery): a Hatchet query for this String-based query
     """
     warnings.warn(
-        "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the \
-        future. Please use new-style queries (e.g., hatchet.query.parse_string_dialect) instead.",
+        "Old-style queries are deprecated as of Hatchet 2023.1.0 and will be removed in the future. Please use new-style queries (e.g., hatchet.query.parse_string_dialect) instead.",
         DeprecationWarning,
         stacklevel=2,
     )
diff --git a/hatchet/tests/query.py b/hatchet/tests/query.py
index e5dd3277..a8082dea 100644
--- a/hatchet/tests/query.py
+++ b/hatchet/tests/query.py
@@ -401,9 +401,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
-        DummyType()
-    )
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
+        "list"
+    ] = DummyType()
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = [{"name": "foo"}, {"name": "bar"}, {"list": DummyType()}]
     query = ObjectQuery(path)
@@ -1028,9 +1028,9 @@ def __init__(self):
             self.z = "hello"
 
     bad_field_test_dict = list(mock_graph_literal)
-    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"]["list"] = (
-        DummyType()
-    )
+    bad_field_test_dict[0]["children"][0]["children"][0]["metrics"][
+        "list"
+    ] = DummyType()
     gf = GraphFrame.from_literal(bad_field_test_dict)
     path = """MATCH (p)->(q)->(r)
     WHERE p."name" = "foo" AND q."name" = "bar" AND p."list" = DummyType()