spgroup · nathaliafab · May 13, 2024 · May 14, 2024 · May 17, 2024 · May 28, 2024
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -11,16 +11,16 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v6
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v6
       with:
-        python-version: 3.9
+        python-version: '3.10'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest mypy
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install flake8 pytest mypy
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
@@ -30,19 +30,21 @@ jobs:
     - name: Lint with mypy
       run: mypy nimrod/test_suite_generation/ nimrod/test_suites_execution/ nimrod/dynamic_analysis/ nimrod/core nimrod/output_generation nimrod/__main__.py nimrod/smat.py --ignore-missing-imports
     - name: Setup Java
-      uses: actions/setup-java@v2
+      uses: actions/setup-java@v5
       with:
-        distribution: 'adopt'
+        distribution: 'temurin'
         java-version: '8'
     - name: Setup Maven
-      uses: stCarolas/setup-maven@v4.1
+      uses: stCarolas/setup-maven@v5
+      with:
+        maven-version: 3.8.2
     - name: Creating env-config.json
       run: |
-        cd /home/runner/work/SMAT/SMAT/nimrod/tests/
-        java_path="/opt/hostedtoolcache/Java_Adopt_jdk/$(ls /opt/hostedtoolcache/Java_Adopt_jdk)/x64"
-        contents="$(jq --arg java_path "$java_path" '.java_home=$java_path | .maven_home = "/opt/hostedtoolcache/maven/3.5.4/x64"' env-config.json)"
+        repo_name=$(basename $GITHUB_REPOSITORY)
+        cd /home/runner/work/$repo_name/$repo_name/nimrod/tests/
+        contents="$(jq --arg java_path "$JAVA_HOME" --arg maven_path "${MAVEN_HOME:-/opt/hostedtoolcache/maven/3.8.2/x64}" '.java_home=$java_path | .maven_home=$maven_path' env-config.json)"
         echo "${contents}" > env-config.json
-        cd /home/runner/work/SMAT/SMAT
+        cd /home/runner/work/$repo_name/$repo_name
     - name: Test with pytest
       run: |
-        pytest -k 'not test_general_behavior_study_semantic_conflict'
+        pytest -k 'not test_general_behavior_study_semantic_conflict' --color=yes
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,17 @@
+services:
+  smat:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile
+      args:
+        USER_ID: ${USER_ID}
+        GROUP_ID: ${GROUP_ID}
+    image: smat-ubuntu
+    container_name: smat_container
+    volumes:
+      # Mount the current directory to /app in the container
+      - .:/app
+      # Mount the dataset directory to /data/dataset in the container (read-only)
+      - /path/to/your/mergedataset/:/data/dataset:ro
+    stdin_open: true
+    tty: true
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,44 @@
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG USER_ID
+ARG GROUP_ID
+
+# 1. Install system dependencies, Python, and Java in a single layer
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        python3.11 \
+        python3-pip \
+        python3.11-dev \
+        openjdk-8-jdk \
+        maven \
+        git \
+        curl \
+        jq \
+        build-essential \
+        ca-certificates && \
+
+    groupadd -g ${GROUP_ID} appuser && \
+    useradd -m -u ${USER_ID} -g ${GROUP_ID} appuser && \
+
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \
+
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# 2. Set Environment Variables for Java and Maven
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+ENV MAVEN_HOME=/usr/share/maven
+ENV PATH="$MAVEN_HOME/bin:$JAVA_HOME/bin:$PATH"
+
+# 3. Setup working directory and data mount point
+WORKDIR /app
+RUN mkdir -p /data/dataset && chown appuser:appuser /data/dataset
+
+# 4. Install Python dependencies
+COPY --chown=appuser:appuser requirements.txt .
+RUN python3 -m pip install --no-cache-dir --upgrade pip && \
+    python3 -m pip install --no-cache-dir -r requirements.txt ruff mypy pytest
+
+USER appuser
+CMD ["/bin/bash", "-i"]
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,109 @@
+# Running SMAT with Docker Compose
+
+This guide explains how to run SMAT in a containerized environment using **Docker Compose**. This ensures a consistent environment with Python 3.11, Java 8, and Maven, regardless of your host operating system.
+
+## 1. Prerequisites
+
+Install Docker on your system:
+
+* **Docker Desktop** (Recommended for Windows and Mac): [Download here](https://docs.docker.com/desktop/)
+* **Docker Engine** (For Linux): [Installation Guide](https://docs.docker.com/engine/install/)
+
+---
+
+## 2. Configuration
+
+Before running the container, you need to configure three files to ensure the paths match the Docker environment.
+
+### A. Docker Compose (Dataset Path)
+
+Open `docker-compose.yml` and point the dataset volume to your local path:
+
+```yaml
+volumes:
+  - .:/app
+  # Replace the path below with the path to your dataset on your host machine
+  - /path/to/your/mergedataset/:/data/dataset:ro
+
+```
+
+*Note: The `:ro` flag ensures your dataset is read-only for safety.*
+
+### B. SMAT Input Config (`input-smat.json`)
+
+The `input-smat.json` should be in the **root directory** of the project, so that the container can see it. Internally, the scenario jars must point to the `/data/dataset/` path, for example:
+
+```json
+{
+  ...
+  "scenarioJars": {
+      "base": "/data/dataset/antlr4/69ff2669eec265e25721dbc27cb00f6c381d0b41/...",
+      ...
+    },
+  ...
+}
+```
+
+### C. Environment Config (`nimrod/tests/env-config.json`)
+
+Point the `input_smat` path to the location inside the container. If it is on the root folder:
+
+```json
+"input_path": "/app/input-smat.json",
+```
+
+---
+
+## 3. Running the Container
+
+Navigate to the project root and run the following command according to your OS:
+
+### Linux & macOS (Terminal)
+
+The following command passes your user and group IDs to avoid permission issues with generated files:
+
+```bash
+USER_ID=$(id -u) GROUP_ID=$(id -g) docker compose run --rm --build smat
+```
+
+### Windows (PowerShell)
+
+In PowerShell, the variables are handled differently:
+
+```powershell
+$env:USER_ID=1000; $env:GROUP_ID=1000; docker compose run --rm --build smat
+```
+
+*Note: On Windows, the default UID/GID 1000 is usually sufficient for Docker Desktop.*
+
+---
+
+## 4. Usage Inside the Container
+
+Once the command finishes, you will be inside the Ubuntu shell at `/app`. You can run tests or start an analysis:
+
+```bash
+# Check if the dataset is visible
+ls /data/dataset
+
+# Run SMAT analysis
+python3 -m nimrod
+
+# Run tests
+pytest -k 'not test_general_behavior_study_semantic_conflict'
+```
+
+### Command Breakdown:
+
+* `run`: Starts a one-off container for interactive use.
+* `--rm`: Automatically removes the container upon exit to keep your system clean.
+* `--build`: Forces a rebuild of the image if you modified the Dockerfile or requirements.
+* `smat`: The service name defined in `docker-compose.yml`.
+
+---
+
+## Troubleshooting
+
+* **Dataset Not Found**: Ensure the path on the left side of the colon in `docker-compose.yml` is an absolute path to your local folder.
+* **Permission Denied**: On Linux, double-check that `USER_ID` and `GROUP_ID` match the output of the `id` command on your host terminal.
+* **File Changes**: Since we use volumes, any code change made on your host machine will be instantly reflected inside the container.
diff --git a/nimrod/__main__.py b/nimrod/__main__.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Dict, List
+from typing import Dict, List, Any
 from nimrod.dynamic_analysis.behavior_change_checker import BehaviorChangeChecker
 from nimrod.dynamic_analysis.criteria.first_semantic_conflict_criteria import FirstSemanticConflictCriteria
 from nimrod.dynamic_analysis.criteria.second_semantic_conflict_criteria import SecondSemanticConflictCriteria
@@ -16,6 +16,7 @@
 from nimrod.test_suite_generation.generators.randoop_test_suite_generator import RandoopTestSuiteGenerator
 from nimrod.test_suite_generation.generators.evosuite_differential_test_suite_generator import EvosuiteDifferentialTestSuiteGenerator
 from nimrod.test_suite_generation.generators.evosuite_test_suite_generator import EvosuiteTestSuiteGenerator
+from nimrod.test_suite_generation.generators.ollama_test_suite_generator import OllamaTestSuiteGenerator
 from nimrod.test_suite_generation.generators.project_test_suite_generator import ProjectTestSuiteGenerator
 from nimrod.test_suites_execution.main import TestSuitesExecution, TestSuiteExecutor
 from nimrod.tools.bin import MOD_RANDOOP, RANDOOP
@@ -24,9 +25,9 @@
 from nimrod.input_parsing.input_parser import CsvInputParser, JsonInputParser
 
 
-def get_test_suite_generators(config: Dict[str, str]) -> List[TestSuiteGenerator]:
+def get_test_suite_generators(config: Dict[str, Any]) -> List[TestSuiteGenerator]:
   config_generators = config.get(
-      'test_suite_generators', ['randoop', 'randoop-modified', 'evosuite', 'evosuite-differential', 'project'])
+      'test_suite_generators', ['randoop', 'randoop-modified', 'evosuite', 'evosuite-differential', 'ollama', 'project'])
   generators: List[TestSuiteGenerator] = list()
 
   if 'randoop' in config_generators:
@@ -38,13 +39,21 @@ def get_test_suite_generators(config: Dict[str, str]) -> List[TestSuiteGenerator
     generators.append(EvosuiteTestSuiteGenerator(Java()))
   if 'evosuite-differential' in config_generators:
     generators.append(EvosuiteDifferentialTestSuiteGenerator(Java()))
+  if 'ollama' in config_generators:
+    # Create one generator instance for each configured model
+    api_params = config.get('api_params', {})
+    if api_params:
+      for model_key, model_config in api_params.items():
+        generators.append(OllamaTestSuiteGenerator(Java(), model_key, model_config))
+    else:
+      generators.append(OllamaTestSuiteGenerator(Java()))
   if 'project' in config_generators:
     generators.append(ProjectTestSuiteGenerator(Java()))
 
   return generators
 
 
-def get_output_generators(config: Dict[str, str]) -> List[OutputGenerator]:
+def get_output_generators(config: Dict[str, Any]) -> List[OutputGenerator]:
   config_generators = config.get(
       'output_generators', ['behavior_changes', 'semantic_conflicts', 'test_suites'])
   generators: List[OutputGenerator] = list()
@@ -60,7 +69,7 @@ def get_output_generators(config: Dict[str, str]) -> List[OutputGenerator]:
   return generators
 
 
-def parse_scenarios_from_input(config: Dict[str, str]) -> List[MergeScenarioUnderAnalysis]:
+def parse_scenarios_from_input(config: Dict[str, Any]) -> List[MergeScenarioUnderAnalysis]:
     json_input = config.get('input_path', "")
     csv_input_path = config.get('path_hash_csv', "")
 
@@ -95,7 +104,7 @@ def main():
     if scenario.run_analysis:
       smat.run_tool_for_semmantic_conflict_detection(scenario)
     else:
-      logging.info(f"Skipping tool execution for project f{scenario.project_name}")
+      logging.info(f"Skipping tool execution for project {scenario.project_name}")
 
 
 if __name__ == '__main__':

diff --git a/nimrod/core/merge_scenario_under_analysis.py b/nimrod/core/merge_scenario_under_analysis.py
@@ -1,8 +1,8 @@
-from typing import List, Dict
+from typing import List, Dict, Union
 
 
 class MergeScenarioUnderAnalysis:
-    def __init__(self, project_name: str, run_analysis: bool, scenario_commits: "ScenarioInformation", targets: "Dict[str, List[str]]", scenario_jars: "ScenarioInformation", jar_type: str):
+    def __init__(self, project_name: str, run_analysis: bool, scenario_commits: "ScenarioInformation", targets: "Dict[str, Union[List[Dict[str, str]], List[str]]]", scenario_jars: "ScenarioInformation", jar_type: str):
         self.project_name = project_name
         self.run_analysis = run_analysis
         self.scenario_commits = scenario_commits

diff --git a/nimrod/output_generation/output_generator.py b/nimrod/output_generation/output_generator.py
@@ -11,7 +11,8 @@
 
 
 class OutputGenerator(ABC, Generic[T]):
-    REPORTS_DIRECTORY = path.join(get_base_output_path(), "reports")
+    parent_dir = path.dirname(get_base_output_path())
+    REPORTS_DIRECTORY = path.join(parent_dir, "reports")
 
     def __init__(self, report_name: str) -> None:
         super().__init__()
@@ -27,9 +28,29 @@ def write_report(self, context: OutputGeneratorContext) -> None:
         file_path = path.join(self.REPORTS_DIRECTORY, self._report_name)
 
         logging.info(f"Starting data processing of {self._report_name} report")
-        data = self._generate_report_data(context)
+        new_data = self._generate_report_data(context)
         logging.info(f"Finished data processing of {self._report_name} report")
 
-        with open(file_path, "w") as write:
-            json.dump(data, write)
+        existing_data = self._load_existing_data(file_path)
+
+        if not isinstance(existing_data, list):
+            existing_data = [existing_data] if existing_data else []
+        existing_data.append(new_data)
+
+        self._write_json(file_path, existing_data)
         logging.info(f"Finished generation of {self._report_name} report")
+
+    def _load_existing_data(self, file_path: str):
+        """Loads data stored from previous runs so new data is appended instead of overwriting."""
+        if not path.exists(file_path):
+            return []
+        try:
+            with open(file_path, "r") as read_file:
+                return json.load(read_file)
+        except json.JSONDecodeError:
+            return []
+
+    def _write_json(self, file_path: str, data) -> None:
+        """Writes data to a JSON file with indentation for readability."""
+        with open(file_path, "w") as write_file:
+            json.dump(data, write_file, indent=4)