From 730f96406cb73a47c28565710994ebd19967c7d6 Mon Sep 17 00:00:00 2001 From: Khoi Ngo <67892332+BillQK@users.noreply.github.com> Date: Thu, 19 Jun 2025 17:48:17 -0400 Subject: [PATCH] feat: cache git log in workspace --- BACKLOG.md | 5 +- .../lib/gitlock_core/adapters/vcs/git.ex | 5 - .../infrastructure/git_repository.ex | 172 ++++--- .../infrastructure/workspace/manager.ex | 3 +- .../gitlock_core/adapters/vcs/git_test.exs | 240 ++++----- .../infrastructure/git_repository_test.exs | 463 ++++++++++++------ apps/gitlock_core/test/gitlock_core_test.exs | 200 ++++---- apps/gitlock_core/test/test_helper.exs | 1 + 8 files changed, 627 insertions(+), 462 deletions(-) diff --git a/BACKLOG.md b/BACKLOG.md index f331be2..b021fe2 100644 --- a/BACKLOG.md +++ b/BACKLOG.md @@ -2,8 +2,8 @@ ## Sprint 6/11/2025 - 6/18/2025 -- [ ] Landing page - adding interactive demos and examples -- [ ] BREAKING CHANGES - refactor cli pattern removing :dir options -> Needs to implement a new infrastructure for managing temp directories +- [x] Landing page - adding interactive demos and examples +- [x] BREAKING CHANGES - refactor cli pattern removing :dir options -> Needs to implement a new infrastructure for managing temp directories - [x] Publish CLI tool to package managers (Homebrew for macOS) - [x] Implement Code Age analysis methodology (identify stable vs volatile code based on age) - [x] Add proper descriptions to README files in gitlock_cli and gitlock_core apps @@ -13,6 +13,7 @@ - [ ] Design and implement database schema for SaaS features (users, projects, analysis history, subscription tiers, organizations) - [ ] Set up app.js code structure for Hooks and interactive component +- [ ] Implement a git log cache system - improving performance on large repositories ## To Do diff --git a/apps/gitlock_core/lib/gitlock_core/adapters/vcs/git.ex b/apps/gitlock_core/lib/gitlock_core/adapters/vcs/git.ex index fc67d8d..59e065e 100644 --- a/apps/gitlock_core/lib/gitlock_core/adapters/vcs/git.ex +++ b/apps/gitlock_core/lib/gitlock_core/adapters/vcs/git.ex @@ -35,11 +35,6 @@ defmodule GitlockCore.Adapters.VCS.Git do end end - # Delegate to GitRepository for backward compatibility with tests - def determine_source_type(source) do - GitRepository.determine_source_type(source) - end - # Make public for testing def parse_git_log(log_content) when is_binary(log_content) do cond do diff --git a/apps/gitlock_core/lib/gitlock_core/infrastructure/git_repository.ex b/apps/gitlock_core/lib/gitlock_core/infrastructure/git_repository.ex index 57d8d0b..b777854 100644 --- a/apps/gitlock_core/lib/gitlock_core/infrastructure/git_repository.ex +++ b/apps/gitlock_core/lib/gitlock_core/infrastructure/git_repository.ex @@ -1,11 +1,13 @@ defmodule GitlockCore.Infrastructure.GitRepository do @moduledoc """ Infrastructure for executing git commands and fetching logs. - This module handles the HOW of getting git logs from various sources, + This module handles the HOW of getting git logs from repositories, but doesn't know anything about parsing them. + + Includes transparent caching of git logs to disk for performance. """ require Logger - alias GitlockCore.Infrastructure.Workspace + alias GitlockCore.Infrastructure.Workspace.Store @default_log_options [ "log", @@ -16,90 +18,128 @@ defmodule GitlockCore.Infrastructure.GitRepository do ] @doc """ - Fetch raw git log output from various sources. - Sources can be: - - File paths (reads the file) - - Local repository paths (runs git log) - - Remote URLs (uses workspace to clone first) - """ - def fetch_log(source, options \\ %{}) do - case determine_source_type(source) do - :log_file -> - fetch_from_file(source) + Fetch raw git log output from a git repository. - :local_repo -> - fetch_from_local_repo(source, options) + Automatically caches git logs for workspace-managed repositories. + """ + def fetch_log(repo_path, options \\ %{}) do + # Check if this repo is managed by a workspace (and thus cacheable) + workspace = + Store.list() + |> Enum.find(fn ws -> ws[:path] == repo_path end) + + case workspace do + %{id: workspace_id} -> + # Get fresh workspace data from store to ensure we have latest cache + workspace_data = Store.get(workspace_id) + cache = workspace_data[:git_log_cache] || %{} + fetch_with_cache(workspace_id, repo_path, options, cache) + + _ -> + # No workspace, generate without caching + generate_git_log(repo_path, options) + end + end - :url -> - fetch_from_remote_url(source, options) + # Private Functions - :unknown -> - # Return error that can be transformed by Git adapter - {:error, :enoent} + defp fetch_with_cache(workspace_id, repo_path, options, cache_map) do + options_hash = hash_options(options) + cache_path = Map.get(cache_map, options_hash) + + with {:cached, path} when is_binary(path) <- {:cached, cache_path}, + {:ok, log} <- File.read(path) do + Logger.info("Using cached git log from #{path}") + {:ok, log} + else + _ -> + # Generate and cache + with {:ok, log} <- generate_git_log(repo_path, options) do + cache_path = build_cache_path(workspace_id, options_hash) + + case save_log_to_cache(cache_path, log) do + :ok -> + add_cache_path(workspace_id, options_hash, cache_path) + Logger.info("Successfully cached git log for workspace #{workspace_id}") + + {:error, reason} -> + Logger.warning("Failed to cache git log: #{inspect(reason)}") + end + + {:ok, log} + end end end - @doc """ - Determine what type of source we're dealing with. - Returns :log_file, :local_repo, :url, or :unknown - """ - def determine_source_type(source) do + defp generate_git_log(repo_path, options) do cond do - # Check if it's a remote URL - String.match?(source, ~r/^(https?|git|ssh):\/\//) or String.ends_with?(source, ".git") -> - :url - - # Check if it's a git repository - File.dir?(source) && git_repo?(source) -> - :local_repo + not File.exists?(repo_path) -> + {:error, "Git log failed (2): No such file or directory"} - # Check if it's a regular file - File.regular?(source) -> - :log_file + not File.dir?(repo_path) -> + {:error, "Git log failed (20): Not a directory"} - # Check if path suggests it's a log file - String.ends_with?(source, ".txt") or String.ends_with?(source, ".log") -> - :log_file - - # Default for non-existent paths - assume they're files for backward compatibility true -> - :unknown + Logger.debug("Generating git log from repo: #{repo_path}") + cmd_args = build_log_command(options) + + case System.cmd("git", cmd_args, cd: repo_path, stderr_to_stdout: true) do + {output, 0} -> {:ok, output} + {error, code} -> {:error, "Git log failed (#{code}): #{error}"} + end end end - # Private Functions + defp add_cache_path(workspace_id, options_hash, cache_path) do + workspace = Store.get(workspace_id) + cache = workspace[:git_log_cache] || %{} + updated_cache = Map.put(cache, options_hash, cache_path) - defp fetch_from_file(path) do - Logger.debug("Reading git log from file: #{path}") - File.read(path) + Store.update(workspace_id, %{git_log_cache: updated_cache}) end - defp fetch_from_local_repo(repo_path, options) do - Logger.debug("Generating git log from local repo: #{repo_path}") - cmd_args = build_log_command(options) - - case System.cmd("git", cmd_args, cd: repo_path, stderr_to_stdout: true) do - {output, 0} -> - {:ok, output} - - {error, code} -> - {:error, "Git log failed (#{code}): #{error}"} - end + defp hash_options(options) do + # Create a deterministic hash of the options + options + |> Enum.sort() + |> :erlang.term_to_binary() + |> then(&:crypto.hash(:sha256, &1)) + |> Base.url_encode64(padding: false) + |> String.slice(0..7) end - defp fetch_from_remote_url(url, options) do - Logger.debug("Fetching git log from remote URL: #{url}") - - # Use workspace to handle cloning - Workspace.with(url, options, fn workspace -> - # Once we have the workspace, treat it as a local repo - fetch_from_local_repo(workspace.path, options) - end) + defp build_cache_path(workspace_id, options_hash) do + workspace = Store.get(workspace_id) + + case workspace do + %{path: workspace_path} when is_binary(workspace_path) -> + # Store cache inside the workspace directory + Path.join([workspace_path, ".gitlock_cache", "log_#{options_hash}.txt"]) + + _ -> + # Fallback to temp directory + Path.join([ + System.tmp_dir!(), + "gitlock", + "cache", + workspace_id, + "log_#{options_hash}.txt" + ]) + end end - defp git_repo?(path) do - git_dir = Path.join(path, ".git") - File.dir?(git_dir) || File.regular?(git_dir) + defp save_log_to_cache(cache_path, log_content) do + cache_dir = Path.dirname(cache_path) + + with :ok <- File.mkdir_p(cache_dir), + :ok <- File.write(cache_path, log_content) do + Logger.debug("Cached git log to #{cache_path}") + :ok + else + error -> + Logger.warning("Failed to cache git log: #{inspect(error)}") + error + end end defp build_log_command(options) do diff --git a/apps/gitlock_core/lib/gitlock_core/infrastructure/workspace/manager.ex b/apps/gitlock_core/lib/gitlock_core/infrastructure/workspace/manager.ex index 7a73a8e..5efc893 100644 --- a/apps/gitlock_core/lib/gitlock_core/infrastructure/workspace/manager.ex +++ b/apps/gitlock_core/lib/gitlock_core/infrastructure/workspace/manager.ex @@ -320,7 +320,8 @@ defmodule GitlockCore.Infrastructure.Workspace.Manager do type: detect_type(source), path: path, created_at: DateTime.utc_now(), - opts: opts + opts: opts, + git_log_cache: %{} } end diff --git a/apps/gitlock_core/test/gitlock_core/adapters/vcs/git_test.exs b/apps/gitlock_core/test/gitlock_core/adapters/vcs/git_test.exs index 6945597..86213f7 100644 --- a/apps/gitlock_core/test/gitlock_core/adapters/vcs/git_test.exs +++ b/apps/gitlock_core/test/gitlock_core/adapters/vcs/git_test.exs @@ -6,71 +6,8 @@ defmodule GitlockCore.Adapters.VCS.GitTest do alias GitlockCore.Domain.Entities.{Commit, Author} alias GitlockCore.Domain.Values.FileChange - describe "determine_source_type/1" do - test "detects log file correctly" do - # Create a test file - {:ok, log_file} = Briefly.create(extname: ".txt") - File.write!(log_file, "dummy content") - - assert Git.determine_source_type(log_file) == :log_file - - # Test with common extensions - assert Git.determine_source_type("git_log.txt") == :log_file - assert Git.determine_source_type("commits.log") == :log_file - end - - test "detects local repository correctly" do - # Mock Git repository path - # We'll check for the .git directory to identify repositories - {:ok, repo_path} = Briefly.create(directory: true) - git_dir = Path.join(repo_path, ".git") - - try do - File.mkdir_p!(git_dir) - assert Git.determine_source_type(repo_path) == :local_repo - after - File.rm_rf(repo_path) - end - end - - test "detects URL correctly" do - assert Git.determine_source_type("https://github.com/user/repo.git") == :url - assert Git.determine_source_type("git@github.com:user/repo.git") == :url - end - - test "handles non-existent paths as log files for backward compatibility" do - assert Git.determine_source_type("/non/existent/path") == :unknown - end - end - - describe "get_commit_history/2 with local repository" do - # These tests would work better as integration tests - # with a real Git repository, but we'll mock the output - - test "handles local Git repository by calling git command" do - # Setup a mock repo - {:ok, repo_path} = Briefly.create(directory: true) - git_dir = Path.join(repo_path, ".git") - - try do - # Create mock Git repo structure - File.mkdir_p!(git_dir) - - # Mock System.cmd to return a predefined log output - # This requires mocking, which is beyond the scope of this test - # In a real implementation, you would use a tool like Mox - - # Instead, we'll verify the path is recognized as a repo - assert Git.determine_source_type(repo_path) == :local_repo - after - File.rm_rf(repo_path) - end - end - end - - describe "get_commit_history/2" do - test "successfully parses a valid git log file" do - # Create a test git log file with proper format + describe "parse_git_log/1" do + test "successfully parses a valid git log" do log_content = """ --abc123--2023-01-01--John Doe 10\t5\tlib/example.ex @@ -81,10 +18,7 @@ defmodule GitlockCore.Adapters.VCS.GitTest do -\t-\tassets/image.png """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, commits} = Git.get_commit_history(log_file, %{}) + assert {:ok, commits} = Git.parse_git_log(log_content) assert length(commits) == 2 # Verify first commit @@ -114,20 +48,14 @@ defmodule GitlockCore.Adapters.VCS.GitTest do } = second_commit end - test "handles empty log file" do - {:ok, log_file} = Briefly.create() - File.write!(log_file, "") - - assert {:ok, []} = Git.get_commit_history(log_file, %{}) + test "handles empty log content" do + assert {:ok, []} = Git.parse_git_log("") end - test "handles log file with only whitespace" do - {:ok, log_file} = Briefly.create() - File.write!(log_file, "\n\n \n\t\n") - + test "handles log with only whitespace" do # The parser treats whitespace-only content as a malformed header assert {:error, {:commit, "Invalid commit header format:" <> _}} = - Git.get_commit_history(log_file, %{}) + Git.parse_git_log("\n\n \n\t\n") end test "handles single commit" do @@ -136,10 +64,7 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 42\t13\tsrc/main.ex """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, [commit]} = Git.get_commit_history(log_file, %{}) + assert {:ok, [commit]} = Git.parse_git_log(log_content) assert commit.id == "single123" assert commit.author.name == "Solo Developer" assert length(commit.file_changes) == 1 @@ -150,10 +75,7 @@ defmodule GitlockCore.Adapters.VCS.GitTest do --empty123--2023-04-01--Empty Commit Author """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, [commit]} = Git.get_commit_history(log_file, %{}) + assert {:ok, [commit]} = Git.parse_git_log(log_content) assert commit.file_changes == [] end @@ -163,11 +85,8 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 10\t5\tlib/example.ex """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - assert {:error, {:commit, "Invalid commit header format:" <> _}} = - Git.get_commit_history(log_file, %{}) + Git.parse_git_log(log_content) end test "handles empty header - returns error" do @@ -175,35 +94,21 @@ defmodule GitlockCore.Adapters.VCS.GitTest do """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:error, {:commit, "Empty commit text"}} = - Git.get_commit_history(log_file, %{}) + assert {:error, {:commit, "Empty commit text"}} = Git.parse_git_log(log_content) end - test "handles malformed file change line - returns error" do + test "handles malformed file change line" do log_content = """ --abc123--2023-01-01--John Doe this is not a valid file change line """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - # The parser filters out lines that don't contain tabs, so this results in a commit with no file changes - assert {:ok, [commit]} = Git.get_commit_history(log_file, %{}) + assert {:ok, [commit]} = Git.parse_git_log(log_content) assert commit.id == "abc123" assert commit.file_changes == [] end - test "handles file not found error" do - non_existent = "/tmp/does_not_exist_#{:rand.uniform(10000)}.log" - - assert {:error, {:io, ^non_existent, :enoent}} = - Git.get_commit_history(non_existent, %{}) - end - test "handles authors with special characters" do log_content = """ --abc123--2023-01-01--José García-López @@ -213,15 +118,12 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 8\t3\tlib/中文.ex """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, commits} = Git.get_commit_history(log_file, %{}) + assert {:ok, commits} = Git.parse_git_log(log_content) assert length(commits) == 2 assert Enum.map(commits, & &1.author.name) == ["José García-López", "李明 (Li Ming)"] end - test "preserves commit order from file" do + test "preserves commit order" do log_content = """ --commit1--2023-01-01--Author One 1\t0\tfile1.ex @@ -233,10 +135,7 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 3\t0\tfile3.ex """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, commits} = Git.get_commit_history(log_file, %{}) + assert {:ok, commits} = Git.parse_git_log(log_content) assert Enum.map(commits, & &1.id) == ["commit1", "commit2", "commit3"] end @@ -247,10 +146,7 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 3\t1\ttest/test with spaces.exs """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, [commit]} = Git.get_commit_history(log_file, %{}) + assert {:ok, [commit]} = Git.parse_git_log(log_content) assert Enum.map(commit.file_changes, & &1.entity) == [ "lib/my module/example file.ex", @@ -264,14 +160,97 @@ defmodule GitlockCore.Adapters.VCS.GitTest do 999999\t888888\tlib/huge_refactor.ex """ - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, [commit]} = Git.get_commit_history(log_file, %{}) + assert {:ok, [commit]} = Git.parse_git_log(log_content) [change] = commit.file_changes assert change.loc_added == "999999" assert change.loc_deleted == "888888" end + + test "parses standard git log format" do + # Test the standard git log format (not custom format) + log_content = """ + commit abc123def456 + Author: John Doe + Date: 2023-01-01 + + 10\t5\tlib/example.ex + 3\t1\ttest/example_test.exs + """ + + assert {:ok, [commit]} = Git.parse_git_log(log_content) + assert commit.id == "abc123def456" + assert commit.author.name == "John Doe" + assert commit.author.email == "john@example.com" + assert length(commit.file_changes) == 2 + end + + test "handles multiple commits in standard format" do + log_content = """ + commit abc123 + Author: John Doe + Date: 2023-01-01 + + 10\t5\tlib/example.ex + + commit def456 + Author: Jane Smith + Date: 2023-01-02 + + 20\t0\tlib/another.ex + """ + + assert {:ok, commits} = Git.parse_git_log(log_content) + assert length(commits) == 2 + end + end + + describe "get_commit_history/2 with real git repository" do + @tag :integration + test "fetches from local git repository" do + # Create a temporary git repository + {:ok, repo_dir} = Briefly.create(directory: true) + + # Initialize a git repo + System.cmd("git", ["init"], cd: repo_dir, stderr_to_stdout: true) + + # Configure git user for the test repo + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create a file and commit + test_file = Path.join(repo_dir, "test.txt") + File.write!(test_file, "test content") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Initial commit"], cd: repo_dir) + + # Fetch log + case Git.get_commit_history(repo_dir) do + {:ok, commits} -> + assert length(commits) == 1 + [commit] = commits + assert commit.author.name == "Test User" + assert commit.author.email == "test@example.com" + + {:error, reason} -> + # Git might not be available in CI + IO.puts("Skipping git test: #{reason}") + end + end + + @tag :integration + test "handles empty git repository" do + {:ok, repo_dir} = Briefly.create(directory: true) + System.cmd("git", ["init"], cd: repo_dir) + + case Git.get_commit_history(repo_dir) do + {:ok, commits} -> + assert commits == [] + + {:error, msg} -> + # Empty repo might return an error + assert msg =~ "does not have any commits" or msg =~ "Git log failed" + end + end end describe "property-based testing" do @@ -282,12 +261,8 @@ defmodule GitlockCore.Adapters.VCS.GitTest do commits_data |> Enum.map_join("\n\n", &format_commit_for_log/1) - # Create temp file using Briefly - {:ok, path} = Briefly.create() - File.write!(path, log_content) - # Parse and verify - assert {:ok, parsed_commits} = Git.get_commit_history(path, %{}) + assert {:ok, parsed_commits} = Git.parse_git_log(log_content) assert length(parsed_commits) == length(commits_data) # Verify each commit @@ -303,11 +278,8 @@ defmodule GitlockCore.Adapters.VCS.GitTest do property "handles any malformed input without crashing" do check all(content <- string(:printable)) do - {:ok, path} = Briefly.create() - File.write!(path, content) - # Should either parse successfully or return a proper error - case Git.get_commit_history(path, %{}) do + case Git.parse_git_log(content) do {:ok, commits} -> assert is_list(commits) {:error, reason} -> assert is_tuple(reason) or is_binary(reason) end diff --git a/apps/gitlock_core/test/gitlock_core/infrastructure/git_repository_test.exs b/apps/gitlock_core/test/gitlock_core/infrastructure/git_repository_test.exs index 481b60e..58f04e2 100644 --- a/apps/gitlock_core/test/gitlock_core/infrastructure/git_repository_test.exs +++ b/apps/gitlock_core/test/gitlock_core/infrastructure/git_repository_test.exs @@ -1,88 +1,22 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do - use ExUnit.Case, async: true - + use ExUnit.Case, async: false alias GitlockCore.Infrastructure.GitRepository - - describe "determine_source_type/1" do - test "detects remote URLs correctly" do - assert GitRepository.determine_source_type("https://github.com/user/repo.git") == :url - assert GitRepository.determine_source_type("http://gitlab.com/user/repo.git") == :url - assert GitRepository.determine_source_type("ssh://git@github.com/user/repo.git") == :url - assert GitRepository.determine_source_type("git@github.com:user/repo.git") == :url - assert GitRepository.determine_source_type("git://github.com/user/repo.git") == :url - assert GitRepository.determine_source_type("/local/path/repo.git") == :url - end - - test "detects local git repository" do - # Create a temporary directory with .git folder - {:ok, temp_dir} = Briefly.create(directory: true) - git_dir = Path.join(temp_dir, ".git") - File.mkdir_p!(git_dir) - - assert GitRepository.determine_source_type(temp_dir) == :local_repo - end - - test "detects .git file (submodule)" do - # Create a temporary directory with .git file (submodule case) - {:ok, temp_dir} = Briefly.create(directory: true) - git_file = Path.join(temp_dir, ".git") - File.write!(git_file, "gitdir: /path/to/actual/git") - - assert GitRepository.determine_source_type(temp_dir) == :local_repo - end - - test "detects regular log files" do - # Create an actual file - {:ok, log_file} = Briefly.create() - File.write!(log_file, "log content") - - assert GitRepository.determine_source_type(log_file) == :log_file - end - - test "detects log files by extension even if they don't exist" do - assert GitRepository.determine_source_type("/path/to/git_log.txt") == :log_file - assert GitRepository.determine_source_type("/path/to/commits.log") == :log_file - assert GitRepository.determine_source_type("output.txt") == :log_file - assert GitRepository.determine_source_type("git.log") == :log_file - end - - test "returns unknown for non-existent paths without log extensions" do - assert GitRepository.determine_source_type("/non/existent/path") == :unknown - assert GitRepository.determine_source_type("/path/without/extension") == :unknown - end - - test "directory without .git is unknown" do - {:ok, temp_dir} = Briefly.create(directory: true) - assert GitRepository.determine_source_type(temp_dir) == :unknown - end + alias GitlockCore.Infrastructure.Workspace + alias GitlockCore.Infrastructure.Workspace.Store + + setup do + # Clean up any existing workspaces before each test + on_exit(fn -> + Workspace.list() + |> Enum.each(fn workspace -> + Workspace.release(workspace.id) + end) + end) + + :ok end describe "fetch_log/2" do - test "fetches from file successfully" do - # Create a test log file - log_content = """ - commit abc123 - Author: Test User - Date: 2023-01-01 - - 10\t5\tfile.ex - """ - - {:ok, log_file} = Briefly.create() - File.write!(log_file, log_content) - - assert {:ok, ^log_content} = GitRepository.fetch_log(log_file) - end - - test "returns error for non-existent file" do - non_existent = "/tmp/non_existent_#{:rand.uniform(10000)}.log" - assert {:error, :enoent} = GitRepository.fetch_log(non_existent) - end - - test "returns error for unknown source type" do - assert {:error, :enoent} = GitRepository.fetch_log("/unknown/path/type") - end - @tag :integration test "fetches from local git repository" do # This test requires git to be installed @@ -116,7 +50,7 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do test "builds log command with various options" do {:ok, repo_dir} = Briefly.create(directory: true) - File.mkdir_p!(Path.join(repo_dir, ".git")) + System.cmd("git", ["init"], cd: repo_dir) options = %{ since: "2023-01-01", @@ -142,12 +76,10 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do end test "handles git command failure" do - # Create a directory that looks like a git repo but isn't valid + # Create a directory that's not a git repo {:ok, repo_dir} = Briefly.create(directory: true) - git_dir = Path.join(repo_dir, ".git") - File.mkdir_p!(git_dir) - # This should fail because it's not a real git repository + # This should fail because it's not a git repository result = GitRepository.fetch_log(repo_dir) case result do @@ -159,68 +91,33 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do assert true end end - end - - describe "fetch_log/2 with remote URLs" do - test "attempts to fetch from remote URL" do - # This test would require mocking Workspace.with/3 - # Since we can't easily mock it without adding Mox, we'll test the flow - url = "https://github.com/test/repo.git" + test "handles non-existent directory" do + non_existent = "/tmp/non_existent_#{:rand.uniform(10000)}" - # We expect this to fail in test environment - case GitRepository.fetch_log(url) do - {:error, _reason} -> - # Expected to fail without actual implementation - assert true - - {:ok, _} -> - # If it somehow succeeds, that's unexpected but okay - assert true - end + assert {:error, msg} = GitRepository.fetch_log(non_existent) + assert msg =~ "Git log failed" end end - describe "command building" do - test "default log options are correct" do - # We can verify the module attribute is set correctly - # by checking the actual git command that would be run + describe "option handling" do + setup do + # Create a valid git repository {:ok, repo_dir} = Briefly.create(directory: true) - File.mkdir_p!(Path.join(repo_dir, ".git")) - - # The error message will contain the command that was attempted - case GitRepository.fetch_log(repo_dir, %{}) do - {:error, msg} -> - # Verify the command includes our expected format - assert msg =~ "log" || msg =~ "Git" - - {:ok, output} -> - # If git is available, verify output format - assert output =~ "commit" || output == "" - end - end - end - - describe "error handling" do - test "returns :enoent for unknown source types" do - weird_path = "not-a-url-or-file" - assert {:error, :enoent} = GitRepository.fetch_log(weird_path) - end + System.cmd("git", ["init"], cd: repo_dir) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) - test "propagates file read errors" do - # Create a file and then delete it to cause an error - {:ok, temp_file} = Briefly.create() - File.rm!(temp_file) + # Create a commit + file = Path.join(repo_dir, "test.txt") + File.write!(file, "content") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Test"], cd: repo_dir) - assert {:error, :enoent} = GitRepository.fetch_log(temp_file) + {:ok, repo_dir: repo_dir} end - end - - describe "option handling" do - test "handles date-based options" do - {:ok, repo_dir} = Briefly.create(directory: true) - File.mkdir_p!(Path.join(repo_dir, ".git")) + test "handles date-based options", %{repo_dir: repo_dir} do options = %{ since: "2023-01-01", until: "2023-12-31", @@ -233,10 +130,7 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do assert match?({:error, _}, result) or match?({:ok, _}, result) end - test "handles filtering options" do - {:ok, repo_dir} = Briefly.create(directory: true) - File.mkdir_p!(Path.join(repo_dir, ".git")) - + test "handles filtering options", %{repo_dir: repo_dir} do options = %{ author: "John Doe", grep: "fix|feat", @@ -248,10 +142,7 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do assert match?({:error, _}, result) or match?({:ok, _}, result) end - test "ignores unknown options" do - {:ok, repo_dir} = Briefly.create(directory: true) - File.mkdir_p!(Path.join(repo_dir, ".git")) - + test "ignores unknown options", %{repo_dir: repo_dir} do options = %{ unknown_option: "value", another_unknown: 123, @@ -264,4 +155,286 @@ defmodule GitlockCore.Infrastructure.GitRepositoryTest do assert match?({:error, _}, result) or match?({:ok, _}, result) end end + + describe "caching behavior" do + setup do + # Create a valid git repository for cache tests + {:ok, repo_dir} = Briefly.create(directory: true) + + System.cmd("git", ["init"], cd: repo_dir, stderr_to_stdout: true) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create some commits + for i <- 1..3 do + file = Path.join(repo_dir, "file#{i}.txt") + File.write!(file, "content #{i}") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Commit #{i}"], cd: repo_dir) + end + + {:ok, repo_dir: repo_dir} + end + + test "does not cache when no workspace exists", %{repo_dir: repo_dir} do + # Ensure no workspace exists by searching Store + assert Enum.find(Store.list(), fn ws -> ws[:path] == repo_dir end) == nil + + # Fetch log twice + {:ok, log1} = GitRepository.fetch_log(repo_dir) + {:ok, log2} = GitRepository.fetch_log(repo_dir) + + # Should get the same content + assert log1 == log2 + assert log1 =~ "Commit 3" or log1 =~ "commit" + + # No workspace should have been created + assert Enum.find(Store.list(), fn ws -> ws[:path] == repo_dir end) == nil + end + + test "caches log when workspace exists", %{repo_dir: repo_dir} do + # Create workspace + {:ok, workspace} = Workspace.acquire(repo_dir) + + # First fetch should generate and cache + {:ok, log1} = GitRepository.fetch_log(repo_dir) + + # Check that cache was created + workspace_after = Store.get(workspace.id) + cache = workspace_after[:git_log_cache] || %{} + assert is_map(cache) + assert map_size(cache) == 1 + + # Get cache file path + [cache_path] = Map.values(cache) + assert File.exists?(cache_path) + + # Second fetch should use cache + {:ok, log2} = GitRepository.fetch_log(repo_dir) + assert log1 == log2 + + Workspace.release(workspace.id) + end + + test "creates separate cache entries for different options", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Fetch with different options + {:ok, _log_all} = GitRepository.fetch_log(repo_dir, %{}) + {:ok, _log_one} = GitRepository.fetch_log(repo_dir, %{max_count: 1}) + {:ok, _log_author} = GitRepository.fetch_log(repo_dir, %{author: "Test User"}) + + # Should have three cache entries + workspace_after = Store.get(workspace.id) + cache = workspace_after[:git_log_cache] || %{} + assert map_size(cache) == 3 + + # All cache files should exist + Enum.each(cache, fn {_hash, path} -> + assert File.exists?(path) + end) + + Workspace.release(workspace.id) + end + + test "regenerates cache when file is missing", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # First fetch creates cache + {:ok, log1} = GitRepository.fetch_log(repo_dir) + + # Delete cache file + workspace_data = Store.get(workspace.id) + cache = workspace_data[:git_log_cache] || %{} + [cache_path] = Map.values(cache) + File.rm!(cache_path) + + # Next fetch should regenerate + {:ok, log2} = GitRepository.fetch_log(repo_dir) + assert log1 == log2 + assert File.exists?(cache_path) + + Workspace.release(workspace.id) + end + + test "cache survives workspace touch/update", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Create cache + {:ok, _log} = GitRepository.fetch_log(repo_dir) + + # Touch workspace (update last_accessed) + Store.touch(workspace.id) + + # Cache should still work + {:ok, _log2} = GitRepository.fetch_log(repo_dir) + + workspace_after = Store.get(workspace.id) + cache = workspace_after[:git_log_cache] || %{} + assert map_size(cache) == 1 + + Workspace.release(workspace.id) + end + end + + describe "cache location" do + setup do + # Create a valid git repository + {:ok, repo_dir} = Briefly.create(directory: true) + + System.cmd("git", ["init"], cd: repo_dir, stderr_to_stdout: true) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create a commit + file = Path.join(repo_dir, "test.txt") + File.write!(file, "content") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Test"], cd: repo_dir) + + {:ok, repo_dir: repo_dir} + end + + test "stores cache in workspace directory", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Fetch log to create cache + {:ok, _log} = GitRepository.fetch_log(repo_dir) + + # Check cache location + workspace_data = Store.get(workspace.id) + cache = workspace_data[:git_log_cache] || %{} + + if map_size(cache) > 0 do + [cache_path] = Map.values(cache) + + # Should be inside workspace directory + assert String.starts_with?(cache_path, workspace.path) + assert cache_path =~ ".gitlock_cache" + end + + Workspace.release(workspace.id) + end + end + + describe "cache performance" do + setup do + # Create a git repository with many commits + {:ok, repo_dir} = Briefly.create(directory: true) + + System.cmd("git", ["init"], cd: repo_dir, stderr_to_stdout: true) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create many commits for a meaningful test + for i <- 1..50 do + file = Path.join(repo_dir, "file#{i}.txt") + File.write!(file, "content #{i}") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Commit #{i}"], cd: repo_dir) + end + + {:ok, repo_dir: repo_dir} + end + + @tag :performance + test "cached fetch is faster than generation", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Time first fetch (generation) + start1 = System.monotonic_time(:microsecond) + {:ok, _log1} = GitRepository.fetch_log(repo_dir) + time1 = System.monotonic_time(:microsecond) - start1 + + # Time second fetch (cached) + start2 = System.monotonic_time(:microsecond) + {:ok, _log2} = GitRepository.fetch_log(repo_dir) + time2 = System.monotonic_time(:microsecond) - start2 + + # Cached should be significantly faster + assert time2 < time1 + + IO.puts( + "Generation: #{time1}μs, Cached: #{time2}μs, Speedup: #{Float.round(time1 / time2, 1)}x" + ) + + Workspace.release(workspace.id) + end + end + + describe "edge cases" do + setup do + # Create a valid git repository + {:ok, repo_dir} = Briefly.create(directory: true) + + System.cmd("git", ["init"], cd: repo_dir, stderr_to_stdout: true) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create a commit + file = Path.join(repo_dir, "test.txt") + File.write!(file, "content") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Test"], cd: repo_dir) + + {:ok, repo_dir: repo_dir} + end + + test "handles cache directory creation failure gracefully", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Even if caching fails, fetch should still work + {:ok, log} = GitRepository.fetch_log(repo_dir) + assert log =~ "Test" or log =~ "test" + + Workspace.release(workspace.id) + end + + test "handles concurrent fetches safely", %{repo_dir: repo_dir} do + {:ok, workspace} = Workspace.acquire(repo_dir) + + # Launch multiple concurrent fetches + tasks = + for _ <- 1..5 do + Task.async(fn -> + GitRepository.fetch_log(repo_dir) + end) + end + + results = Task.await_many(tasks, 5000) + + # All should succeed + assert Enum.all?(results, &match?({:ok, _}, &1)) + + # All should return same content + logs = Enum.map(results, fn {:ok, log} -> log end) + first_log = hd(logs) + assert Enum.all?(logs, &(&1 == first_log)) + + # Should only have one cache entry + workspace_data = Store.get(workspace.id) + cache = workspace_data[:git_log_cache] || %{} + assert map_size(cache) == 1 + + Workspace.release(workspace.id) + end + + test "handles empty repository", %{} do + {:ok, empty_repo} = Briefly.create(directory: true) + System.cmd("git", ["init"], cd: empty_repo) + + {:ok, workspace} = Workspace.acquire(empty_repo) + + # Should handle empty repo gracefully + case GitRepository.fetch_log(empty_repo) do + {:ok, log} -> + assert log == "" or log =~ "does not have any commits" + + {:error, msg} -> + assert msg =~ "does not have any commits" or msg =~ "Git log failed" + end + + Workspace.release(workspace.id) + end + end end diff --git a/apps/gitlock_core/test/gitlock_core_test.exs b/apps/gitlock_core/test/gitlock_core_test.exs index 771d303..f9a8207 100644 --- a/apps/gitlock_core/test/gitlock_core_test.exs +++ b/apps/gitlock_core/test/gitlock_core_test.exs @@ -1,151 +1,133 @@ defmodule GitlockCoreTest do - use ExUnit.Case, async: true - - import GitlockCore.TestSupport.AdaptersSetup, only: [setup_real_adapters: 0] - - alias GitlockCore - - setup_real_adapters() + use ExUnit.Case describe "investigate/3 - delegation behavior" do - test "delegates to UseCaseFactory with correct investigation type" do - # Test that it calls UseCaseFactory with the right type - # We can verify this by using a known invalid type - result = GitlockCore.investigate(:definitely_not_a_valid_type, "path") + test "passes through the return value from use case execution" do + # Use a path that looks like a git repo but doesn't exist + non_existent_repo = "/tmp/non_existent_repo_#{:rand.uniform(10000)}" - # The error should come from UseCaseFactory - assert {:error, "Unknown investigation type: definitely_not_a_valid_type"} = result + # Now returns git command error instead of file error + assert {:error, result} = GitlockCore.investigate(:summary, non_existent_repo) + assert result =~ "Git log failed" end - test "passes through the return value from use case execution", %{adapter_keys: adapter_keys} do - # When given a valid investigation type, it should return whatever the use case returns - # Test with a non-existent file to get a predictable error - options = AdaptersSetup.test_options(adapter_keys) - result = GitlockCore.investigate(:summary, "/this/file/does/not/exist.log", options) + test "forwards repo_path to use case unchanged" do + # Create a unique path + unique_path = "/tmp/unique_repo_#{System.unique_integer([:positive])}" - # Should get an error from the actual use case trying to read the file - assert {:error, error} = result - # The error format is {:io, path, reason} - assert {:io, "/this/file/does/not/exist.log", :enoent} = error + # The error should contain our path + assert {:error, result} = GitlockCore.investigate(:hotspots, unique_path) + assert result =~ "Git log failed" end - test "forwards repo_path to use case unchanged", %{adapter_keys: adapter_keys} do - # Use a unique path that we can identify in the error - unique_path = "/very/unique/path/#{System.unique_integer()}.log" - options = AdaptersSetup.test_options(adapter_keys) + test "forwards options to use case unchanged" do + repo_path = "/tmp/test_repo_#{:rand.uniform(10000)}" - result = GitlockCore.investigate(:summary, unique_path, options) + custom_options = %{ + format: "json", + since: "2023-01-01", + custom_key: "custom_value" + } - # The error should contain our unique path - assert {:error, {:io, ^unique_path, :enoent}} = result + # The use case should receive these options + # Since it will fail, we just verify it doesn't crash + assert {:error, _} = GitlockCore.investigate(:summary, repo_path, custom_options) end - test "forwards options to use case unchanged", %{adapter_keys: adapter_keys} do - # For blast_radius, we can test that options are passed through - # by not providing required options - options = AdaptersSetup.test_options(adapter_keys, %{dir: "/tmp"}) - result = GitlockCore.investigate(:blast_radius, "any.log", options) + test "provides empty map as default options" do + nonexistent_path = "/tmp/nonexistent_#{System.unique_integer([:positive])}" - # Should get error about missing target_files from the use case - assert {:error, "No target_files specified. Use --target-files option"} = result + # Should work with no options provided + assert {:error, result} = GitlockCore.investigate(:summary, nonexistent_path) + assert result =~ "Git log failed" end + end - test "provides empty map as default options", %{adapter_keys: adapter_keys} do - # When no options provided, should still work with the real adapters - # Create a unique path that won't exist - nonexistent_path = "/nonexistent_#{System.unique_integer()}.log" + describe "available_investigations/0" do + test "returns all available investigation types" do + investigations = GitlockCore.available_investigations() - # Try with explicit empty options - options = AdaptersSetup.test_options(adapter_keys) - result = GitlockCore.investigate(:summary, nonexistent_path, options) + assert :hotspots in investigations + assert :couplings in investigations + assert :knowledge_silos in investigations + assert :summary in investigations + assert :blast_radius in investigations + assert :code_age in investigations + assert :coupled_hotspots in investigations - # Should get the expected error - assert {:error, {:io, ^nonexistent_path, :enoent}} = result + # Should be exactly 7 investigation types + assert length(investigations) == 7 end - end - describe "available_investigations/0 - delegation behavior" do - test "returns list from UseCaseFactory" do - result = GitlockCore.available_investigations() + test "all investigation types can be executed" do + repo_path = "/tmp/test_repo_#{:rand.uniform(10000)}" - # Should return the list from UseCaseFactory - assert is_list(result) - assert length(result) > 0 - assert Enum.all?(result, &is_atom/1) + for investigation_type <- GitlockCore.available_investigations() do + # Each should return an error for non-existent repo + assert {:error, _} = GitlockCore.investigate(investigation_type, repo_path) + end end + end + + describe "error handling" do + test "returns error for unknown investigation type" do + repo_path = "/tmp/some_repo" - test "returns all expected investigation types" do - result = GitlockCore.available_investigations() + assert {:error, message} = GitlockCore.investigate(:unknown_type, repo_path) + assert message =~ "Unknown investigation type: unknown_type" + end - expected = [ - :hotspots, - :couplings, - :coupled_hotspots, - :knowledge_silos, - :blast_radius, - :summary - ] + test "returns error for invalid repo path" do + invalid_path = "/this/does/not/exist" - assert Enum.all?(expected, &(&1 in result)) + assert {:error, message} = GitlockCore.investigate(:summary, invalid_path) + assert message =~ "Git log failed" end end - describe "contract testing" do - test "investigate always returns {:ok, _} or {:error, _}", %{adapter_keys: adapter_keys} do - # Test various inputs to ensure consistent return format - test_cases = [ - {:valid_type_bad_file, :summary, "/nonexistent.log", %{}}, - {:invalid_type, :not_a_type, "any_path", %{}}, - {:missing_required_opts, :blast_radius, "path", %{dir: "/tmp"}}, - {:empty_opts, :summary, "/bad/path", %{}} - ] - - for {_name, type, path, additional_opts} <- test_cases do - options = AdaptersSetup.test_options(adapter_keys, additional_opts) - result = GitlockCore.investigate(type, path, options) - - # Check the shape of the result - assert tuple_size(result) == 2 - assert elem(result, 0) in [:ok, :error] - - case result do - {:ok, value} -> - assert is_binary(value) - - {:error, reason} -> - # Error can be either a string or a tuple - assert is_binary(reason) or is_tuple(reason) - end + describe "integration with real git repository" do + @tag :integration + test "successfully analyzes a git repository" do + # Create a real git repository + {:ok, repo_dir} = Briefly.create(directory: true) + + # Initialize git repo + System.cmd("git", ["init"], cd: repo_dir) + System.cmd("git", ["config", "user.email", "test@example.com"], cd: repo_dir) + System.cmd("git", ["config", "user.name", "Test User"], cd: repo_dir) + + # Create some commits + for i <- 1..3 do + file = Path.join(repo_dir, "file#{i}.txt") + File.write!(file, "content #{i}") + System.cmd("git", ["add", "."], cd: repo_dir) + System.cmd("git", ["commit", "-m", "Commit #{i}"], cd: repo_dir) end + + # Should successfully analyze + assert {:ok, result} = GitlockCore.investigate(:summary, repo_dir) + assert is_binary(result) end end describe "facade characteristics" do - test "module has minimal public API" do - # Check that we only expose the intended functions + test "no business logic in facade" do + # The facade should only delegate, not process + # Test with empty string instead of nil to avoid workspace manager crash + assert {:error, _} = GitlockCore.investigate(:summary, "") + end + + test "thin delegation layer" do + # GitlockCore module should be minimal + # Check that it only has the expected public functions exports = GitlockCore.__info__(:functions) - # Should only have our two main functions assert {:investigate, 2} in exports assert {:investigate, 3} in exports assert {:available_investigations, 0} in exports - # Shouldn't have many other functions (maybe just module info stuff) - assert length(exports) < 5 - end - - test "no business logic in facade", %{adapter_keys: adapter_keys} do - # The facade should immediately delegate, meaning errors come from - # the delegated modules, not from GitlockCore itself - - # This error comes from UseCaseFactory - {:error, msg1} = GitlockCore.investigate(:bad_type, "path") - assert msg1 == "Unknown investigation type: bad_type" - - # This error comes from the use case (VCS adapter) - options = AdaptersSetup.test_options(adapter_keys) - {:error, error} = GitlockCore.investigate(:summary, "/bad/path", options) - assert {:io, "/bad/path", :enoent} = error + # Should only have these 3 public functions + assert length(exports) == 3 end end end diff --git a/apps/gitlock_core/test/test_helper.exs b/apps/gitlock_core/test/test_helper.exs index 758f76a..22d3294 100644 --- a/apps/gitlock_core/test/test_helper.exs +++ b/apps/gitlock_core/test/test_helper.exs @@ -5,3 +5,4 @@ ExUnit.after_suite(fn _ -> end) Logger.configure(level: :none) +System.cmd("git", ["config", "--global", "init.defaultBranch", "main"])