metagraph-dev
diff --git a/‎conftest.py‎
Lines changed: 317 additions & 5 deletions b/‎conftest.py‎
Lines changed: 317 additions & 5 deletions
diff --git a/‎dev-environment.yml‎
Lines changed: 1 addition & 0 deletions b/‎dev-environment.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir_graphblas/functions.py‎
Lines changed: 12 additions & 6 deletions b/‎mlir_graphblas/functions.py‎
Lines changed: 12 additions & 6 deletions
@@ -1,14 +1,326 @@
 import os
 import distutils.core
 import subprocess
+import json
+import jinja2
+import itertools
+import random
+import re
+import pathlib
+import time
+import zlib
 
+from typing import Any, Dict, List, Tuple, Generator, Callable, Literal, Union, Optional
 
-def pytest_configure(config):
-    distutils.core.run_setup(
-        "./setup.py", script_args=["build_ext", "--inplace"], stop_after="run"
+##########
+# Config #
+##########
+
+
+def pytest_configure(config: "_pytest.config.Config"):
+    if not hasattr(config, "workerinput"):  # ony run once on main process
+        distutils.core.run_setup(
+            "./setup.py", script_args=["build_ext", "--inplace"], stop_after="run"
+        )
+        # Ensure graphblas-opt is built
+        subprocess.run(["python", os.path.join("mlir_graphblas", "src", "build.py")])
+
+    return
+
+
+def pytest_addoption(parser: "_pytest.config.argparsing.Parser"):
+    parser.addoption(
+        "--filecheck-sampling",
+        action="store",
+        default="bucket",
+        help="Method to sample the space of the templatized FileCheck tests.",
+    )
+    return
+
+
+def pytest_generate_tests(metafunc: "_pytest.python.Metafunc"):
+    if metafunc.function.__name__ == "test_filecheck_mlir":
+        parameterize_templatized_filecheck_test(metafunc)
+    return
+
+
+###########################################
+# Parameterization of test_filecheck_mlir #
+###########################################
+
+NAMED_PARAMETER_VALUE_CHOICES = {
+    "STANDARD_ELEMENT_TYPES": [
+        "i1",
+        "i4",
+        "i8",
+        "i16",
+        "i32",
+        "i64",
+        "f16",
+        "bf16",
+        "f32",
+        "f64",
+        "f80",
+        "f128",
+    ],
+    "MATRIX_APPLY_OPERATORS": ["min"],
+    "SPARSITY_TYPES": ["dense", "compressed", "singleton"],
+    "MATRIX_MULTIPLY_SEMIRINGS": ["plus_times", "plus_pair", "plus_plus"]
+}
+
+PREFILTER_FUNCTIONS = {None: lambda *args: True}
+
+# Prefilter Functions
+
+def prefilter_func(func: Callable[[Dict[str, str]], bool]) -> Callable[[Dict[str, str]], bool]:
+    if func.__name__ in PREFILTER_FUNCTIONS:
+        raise Exception(f"{repr(func.__name__)} is already in use as prefilter function name.")
+    PREFILTER_FUNCTIONS[func.__name__] = func
+    return func
+
+@prefilter_func
+def different_thunk_and_element_type(parameter_dict: Dict[str, str]) -> bool:
+    return parameter_dict["element_type"] != parameter_dict["thunk_type"]
+
+@prefilter_func
+def sparse_but_not_compressed_sparse(parameter_dict: Dict[str, str]) -> bool:
+    sparsity0 = parameter_dict["sparsity0"]
+    sparsity1 = parameter_dict["sparsity1"]
+    return "compressed" in (sparsity0, sparsity1) and ((sparsity0, sparsity1) != ("dense", "compressed"))
+
+# Template Expansion
+
+def lazy_list_shuffler(input_list: list) -> Generator[Any, None, None]:
+    """
+    This generator yields an unseen item at random from the input_list.
+    Upon each call to the generator, it'll randomly select an index
+    until an unseen index is found.
+
+    Running this generator to exhaustion is approximately O(n**2).
+    This should only be used in cases where the generator is NOT expected
+    to be exhausted and is expected to run a few times, in
+    which case the expected running time is Theta(1).
+    """
+    seen_indices = set()
+    for _ in range(len(input_list)):
+        while (index := random.randint(0, len(input_list) - 1)) in seen_indices:
+            pass
+        seen_indices.add(index)
+        yield input_list[index]
+    return
+
+
+def parameter_tuples_from_templates(
+    sampling: Union[int, float, Literal["exhaustive", "bucket"]],
+    seed: Optional[int] = None,
+) -> List[Tuple[str, Tuple[str, str, Dict[str, str]]]]:
+    """
+    Returns a list of tuples of the form (
+        "mlir_code",
+        "test command jinja template",
+        "/template/file/location",
+        {"template_parameter_0": "template_parameter_value_0", ...}
+    )
+
+    If sampling is a float (in the range [0, 1]), the number of parameter 
+    tuples returned will be (total_num_possible_cases * sampling) for each
+    template.
+
+    If sampling is an int, the number of parameter tuples returned will
+    be (sampling) for each template.
+
+    If sampling is "exhaustive", we will return all possible parameter
+    tuples for each template.
+
+    If sampling is "bucket", we will yield parameter tuples as follows:
+        For each template parameter name:
+            Randomly sample one template parameter value for all other
+                template parameter names
+            Yield the pytest param for the set of template parameter
+    """
+    if seed is not None:
+        random.seed(seed)
+    parameter_tuples = []
+    current_module_dir = pathlib.Path(__file__).parent.absolute()
+    template_files = (
+        os.path.join(root, f)
+        for root, _, files in os.walk(current_module_dir, followlinks=True)
+        for f in files
+        if f.endswith(".template.mlir")
+    )
+    for template_file in template_files:
+        # Parse the template file
+        with open(template_file, "r") as f:
+            json_sting, mlir_template = f.read().split("### START TEST ###")
+        test_spec: dict = json.loads(json_sting)
+        mlir_template = jinja2.Template(mlir_template, undefined=jinja2.StrictUndefined)
+        if "parameters" not in test_spec:
+            raise ValueError(
+                f'{template_file} does not contain a valid test specification as '
+                'it does not specify a value for the key "parameters".'
+            )
+        elif "run" not in test_spec or not isinstance(test_spec["run"], str):
+            raise ValueError(
+                f'{template_file} does not contain a valid test specification as '
+                'it does not specify a valid value for the key "run".'
+            )
+        prefilter_name = test_spec.get("prefilter")
+        parameter_dict_filter = PREFILTER_FUNCTIONS.get(prefilter_name)
+        if parameter_dict_filter is None:
+            raise NameError(f"Unknown prefilter function named {repr(prefilter_name)}.")
+
+        # Grab test running command
+        test_execution_command_template = test_spec["run"]
+
+        # Grab parameter choices
+        parameter_choices: Dict[str, List[str]] = dict()
+        for parameter_name, parameter_value_choices in test_spec["parameters"].items():
+            if (
+                isinstance(parameter_value_choices, str)
+                and parameter_value_choices in NAMED_PARAMETER_VALUE_CHOICES
+            ):
+                parameter_choices[parameter_name] = NAMED_PARAMETER_VALUE_CHOICES[
+                    parameter_value_choices
+                ]
+            elif isinstance(parameter_value_choices, list):
+                parameter_choices[parameter_name] = parameter_value_choices
+            else:
+                raise ValueError(
+                    f"{repr(parameter_value_choices)} does not specify a valid set of parameter values."
+                )
+
+        # Handle each sampling case separately
+        if sampling == "bucket":
+            parameter_dicts = []
+            for parameter_name, parameter_possible_values in parameter_choices.items():
+                for parameter_value in parameter_possible_values:
+                    # Set up lazy iterators to randomly grab the values of all the other parameters
+                    other_parameter_names = [
+                        name
+                        for name in parameter_choices.keys()
+                        if name != parameter_name
+                    ]
+                    other_parameter_choices_values = (
+                        parameter_choices[name] for name in other_parameter_names
+                    )
+                    other_parameter_choices_values = map(
+                        lazy_list_shuffler, other_parameter_choices_values
+                    )
+                    other_parameter_value_tuples = itertools.product(
+                        *other_parameter_choices_values
+                    )
+                    other_parameter_dicts = (
+                        dict(zip(other_parameter_names, other_parameter_values))
+                        for other_parameter_values in other_parameter_value_tuples
+                    )
+                    # Go through possible parameter dicts until we find a valid one
+                    for parameter_dict in other_parameter_dicts:
+                        parameter_dict[parameter_name] = parameter_value
+                        if parameter_dict_filter(parameter_dict):
+                            parameter_tuples.append(
+                                (
+                                    generate_test_id_string(
+                                        template_file, parameter_dict
+                                    ),
+                                    (
+                                        mlir_template.render(**parameter_dict),
+                                        test_execution_command_template,
+                                        template_file,
+                                        parameter_dict,
+                                    ),
+                                )
+                            )
+                            break
+        else:
+            if isinstance(sampling, int):
+
+                def sampling_method(parameter_dicts):
+                    parameter_dicts = list(parameter_dicts)
+                    num_samples = min(sampling, len(parameter_dicts))
+                    return random.sample(parameter_dicts, num_samples)
+
+            elif isinstance(sampling, float):
+                if sampling < 0 or sampling > 1:
+                    raise ValueError(
+                        f"Portion of parameter dicts to sample must be "
+                        f"in the range [0, 1], got {sampling}."
+                    )
+
+                def sampling_method(parameter_dicts):
+                    parameter_dicts = list(parameter_dicts)
+                    num_samples = int(len(parameter_dicts) * sampling)
+                    return random.sample(parameter_dicts, num_samples)
+
+            elif sampling == "exhaustive":
+
+                def sampling_method(parameter_dicts):
+                    return parameter_dicts
+
+            else:
+                raise ValueError(
+                    f"{repr(sampling)} is not a supported sampling method."
+                )
+
+            # Grab all possible parameter dicts
+            parameter_names = parameter_choices.keys()
+            parameter_value_tuples = itertools.product(*parameter_choices.values())
+            all_parameter_dicts = (
+                dict(zip(parameter_names, parameter_values))
+                for parameter_values in parameter_value_tuples
+            )
+
+            # Find the requested parameter dicts
+            parameter_dicts = filter(parameter_dict_filter, all_parameter_dicts)
+            parameter_dicts = sampling_method(parameter_dicts)
+
+            # Append one parameter dict for each test case
+            for parameter_dict in parameter_dicts:
+                parameter_tuples.append(
+                    (
+                        generate_test_id_string(template_file, parameter_dict),
+                        (
+                            mlir_template.render(**parameter_dict),
+                            test_execution_command_template,
+                            template_file,
+                            parameter_dict,
+                        ),
+                    )
+                )
+
+    return parameter_tuples
+
+
+def generate_test_id_string(template_file: str, parameter_dict: Dict[str, str]) -> str:
+    return "".join(c for c in template_file if c.isalnum()) + "".join(
+        f"({re.escape(key)}:{re.escape(parameter_dict[key])})"
+        for key in sorted(parameter_dict.keys())
     )
 
-    # Ensure graphblas-opt is built
-    subprocess.run(["python", os.path.join("mlir_graphblas", "src", "build.py")])
 
+def parameterize_templatized_filecheck_test(metafunc: "_pytest.python.Metafunc"):
+    sampling_method_string = metafunc.config.getoption("--filecheck-sampling")
+    if sampling_method_string.isdigit():
+        sampling = int(sampling_method_string)
+    elif re.match("^\d+(\.\d+)?$", sampling_method_string):
+        sampling = float(sampling_method_string)
+    else:
+        sampling = sampling_method_string
+    seed = (
+        zlib.adler32(metafunc.config.workerinput["testrunuid"].encode())
+        if hasattr(metafunc.config, "workerinput")
+        else time.time()
+    )
+    with open('/tmp/example.txt', 'a') as f:
+        f.write(str(hasattr(metafunc.config, "workerinput")))
+        f.write('\n')
+        f.write(str(seed))
+        f.write('\n')
+    ids, parameter_values = zip(
+        *parameter_tuples_from_templates(sampling, seed)
+    )  # TODO update the seed
+    metafunc.parametrize(
+        ["mlir_code", "test_command_template", "template_file", "parameter_dict"],
+        parameter_values,
+        ids=ids,
+    )
     return
@@ -10,6 +10,7 @@ dependencies:
   - coverage
   - pytest
   - pytest-cov
+  - pytest-xdist
   - black
 
 # documentation
 
@@ -79,7 +79,8 @@ def get_mlir(self, *, make_private=True):
     {{ body }}
 
 }
-        """
+        """,
+        undefined=jinja2.StrictUndefined,
     )
 
     def get_mlir_module(self, make_private=False):
@@ -169,7 +170,8 @@ def get_mlir(self, *, make_private=True):
 
       {% endif %}
       }
-    """
+    """,
+        undefined=jinja2.StrictUndefined,
     )
 
 
@@ -208,7 +210,8 @@ def get_mlir(self, *, make_private=True):
         %output = graphblas.matrix_select %input { selectors = ["{{ selector }}"] } : tensor<?x?xf64, #CSR64> to tensor<?x?xf64, #CSR64>
         return %output : tensor<?x?xf64, #CSR64>
       }
-    """
+    """,
+        undefined=jinja2.StrictUndefined,
     )
 
 
@@ -250,7 +253,8 @@ def get_mlir(self, *, make_private=True):
 
         return %total : f64
       }
-    """
+    """,
+        undefined=jinja2.StrictUndefined,
     )
 
 
@@ -288,7 +292,8 @@ def get_mlir(self, *, make_private=True):
 
         return %output : tensor<?x?xf64, #CSR64>
       }
-    """
+    """,
+        undefined=jinja2.StrictUndefined,
     )
 
 
@@ -348,5 +353,6 @@ def get_mlir(self, *, make_private=True):
 
         return %output : tensor<?x?xf64, #CSR64>
       }
-    """
+    """,
+        undefined=jinja2.StrictUndefined,
     )
Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,8 @@ def get_mlir(self, *, make_private=True):`
`79`	`79`	`{{ body }}`
`80`	`80`
`81`	`81`	`}`
`82`		`- """`
	`82`	`+ """,`
	`83`	`+ undefined=jinja2.StrictUndefined,`
`83`	`84`	`)`
`84`	`85`
`85`	`86`	`def get_mlir_module(self, make_private=False):`
`@@ -169,7 +170,8 @@ def get_mlir(self, *, make_private=True):`
`169`	`170`
`170`	`171`	`{% endif %}`
`171`	`172`	`}`
`172`		`- """`
	`173`	`+ """,`
	`174`	`+ undefined=jinja2.StrictUndefined,`
`173`	`175`	`)`
`174`	`176`
`175`	`177`
`@@ -208,7 +210,8 @@ def get_mlir(self, *, make_private=True):`
`208`	`210`	`%output = graphblas.matrix_select %input { selectors = ["{{ selector }}"] } : tensor<?x?xf64, #CSR64> to tensor<?x?xf64, #CSR64>`
`209`	`211`	`return %output : tensor<?x?xf64, #CSR64>`
`210`	`212`	`}`
`211`		`- """`
	`213`	`+ """,`
	`214`	`+ undefined=jinja2.StrictUndefined,`
`212`	`215`	`)`
`213`	`216`
`214`	`217`
`@@ -250,7 +253,8 @@ def get_mlir(self, *, make_private=True):`
`250`	`253`
`251`	`254`	`return %total : f64`
`252`	`255`	`}`
`253`		`- """`
	`256`	`+ """,`
	`257`	`+ undefined=jinja2.StrictUndefined,`
`254`	`258`	`)`
`255`	`259`
`256`	`260`
`@@ -288,7 +292,8 @@ def get_mlir(self, *, make_private=True):`
`288`	`292`
`289`	`293`	`return %output : tensor<?x?xf64, #CSR64>`
`290`	`294`	`}`
`291`		`- """`
	`295`	`+ """,`
	`296`	`+ undefined=jinja2.StrictUndefined,`
`292`	`297`	`)`
`293`	`298`
`294`	`299`
`@@ -348,5 +353,6 @@ def get_mlir(self, *, make_private=True):`
`348`	`353`
`349`	`354`	`return %output : tensor<?x?xf64, #CSR64>`
`350`	`355`	`}`
`351`		`- """`
	`356`	`+ """,`
	`357`	`+ undefined=jinja2.StrictUndefined,`
`352`	`358`	`)`