Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 116 additions & 10 deletions implement/GitCore.IntegrationTests/LoadTreeContentsFromGitHubTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,6 @@ public async Task Load_tree_with_custom_http_client_for_profiling()
requestCounter.RequestCount.Should().BeGreaterThan(0, "HTTP requests should have been made");
}

[Fact]
public void Placeholder()
{
/*
* Avoid "Zero tests ran" error in CI as long as there are no real tests yet.
* */
}

[Fact]
public async Task Load_subdirectory_tree_contents()
{
Expand All @@ -98,8 +90,9 @@ public async Task Load_subdirectory_tree_contents()
var subdirectoryPath = new[] { "implement", "GitCore" };

// Load the subdirectory contents
var subdirectoryContents = await LoadFromUrl.LoadSubdirectoryContentsFromGitUrlAsync(
repositoryUrl, commitSha, subdirectoryPath);
var subdirectoryContents =
await LoadFromUrl.LoadSubdirectoryContentsFromGitUrlAsync(
repositoryUrl, commitSha, subdirectoryPath);

// Verify that the subdirectory was loaded successfully
subdirectoryContents.Should().NotBeNull("Subdirectory should be loaded");
Expand Down Expand Up @@ -152,6 +145,72 @@ public async Task Load_subdirectory_tree_contents()
"Common/EnumerableExtensions.cs should have the expected content");
}

[Fact]
public async Task Load_relatively_small_subdirectory_from_larger_repository()
{
// Create a custom HttpClient with a handler to track data transfer
var dataTrackingHandler = new DataTrackingHandler(new System.Net.Http.SocketsHttpHandler());
using var httpClient = new System.Net.Http.HttpClient(dataTrackingHandler);

// Target: Load the 'guide' subdirectory, which is relatively small compared to others.
var repositoryUrl = "https://github.com/pine-vm/pine.git";
var commitSha = "c837c8199f38aab839c40019a50055e16d100c74";
var subdirectoryPath = new[] { "guide" };

// Load the subdirectory contents
var subdirectoryContents =
await LoadFromUrl.LoadSubdirectoryContentsFromGitUrlAsync(
repositoryUrl, commitSha, subdirectoryPath, httpClient);

// Verify that the subdirectory was loaded successfully
subdirectoryContents.Should().NotBeNull("Subdirectory should be loaded");
subdirectoryContents.Count.Should().BeGreaterThan(0, "Subdirectory should contain files");

// Verify that we have the expected files
subdirectoryContents.Should().ContainKey(
["customizing-elm-app-builds-with-compilation-interfaces.md"],
"The subdirectory should contain an 'customizing-elm-app-builds-with-compilation-interfaces.md' file");

subdirectoryContents.Should().ContainKey(
["how-to-build-a-backend-app-in-elm.md"],
"The subdirectory should contain a 'how-to-build-a-backend-app-in-elm.md' file");

var subtreeAggregateFileContentSize =
subdirectoryContents.Values.Sum(file => file.Length);

// Profile data transfer
var totalBytesReceived = dataTrackingHandler.TotalBytesReceived;
var totalBytesSent = dataTrackingHandler.TotalBytesSent;
var requestCount = dataTrackingHandler.RequestCount;

// Log profiling information for debugging
System.Console.WriteLine($"Data Transfer Profile:");
System.Console.WriteLine($" Total Requests: {requestCount}");
System.Console.WriteLine($" Total Bytes Sent: {totalBytesSent:N0} bytes");
System.Console.WriteLine($" Total Bytes Received: {totalBytesReceived:N0} bytes");
System.Console.WriteLine($" Total Data Transfer: {totalBytesSent + totalBytesReceived:N0} bytes");
System.Console.WriteLine($" Subdirectory Content Size: {subtreeAggregateFileContentSize:N0} bytes");
System.Console.WriteLine($" Files in Subdirectory: {subdirectoryContents.Count}");
System.Console.WriteLine($" Compression Ratio: {(double)totalBytesReceived / subtreeAggregateFileContentSize:F2}x");

// Assert bounds on data transfer
// With blobless clone optimization, we:
// 1. Fetch commit + trees only (blobless pack file)
// 2. Navigate to subdirectory and identify needed blobs
// 3. Fetch only those specific blobs
// This results in significantly less data transfer compared to fetching all files

requestCount.Should().BeLessThan(10, "Should not make excessive HTTP requests");

// Set a reasonable upper bound for data transfer with blobless optimization
// We expect data transfer to be close to the actual content size plus some overhead
// for trees, commit, and pack file headers.
var maxExpectedBytes = subtreeAggregateFileContentSize * 4 + 100_000;

totalBytesReceived.Should().BeLessThan(maxExpectedBytes,
$"Should optimize data transfer for subdirectory (received {totalBytesReceived:N0} bytes)");
}

// Helper class for tracking HTTP requests
private class RequestCountingHandler(System.Net.Http.HttpMessageHandler innerHandler)
: System.Net.Http.DelegatingHandler(innerHandler)
Expand All @@ -166,4 +225,51 @@ private class RequestCountingHandler(System.Net.Http.HttpMessageHandler innerHan
return await base.SendAsync(request, cancellationToken);
}
}

// Helper class for tracking data transfer
private class DataTrackingHandler(System.Net.Http.HttpMessageHandler innerHandler)
: System.Net.Http.DelegatingHandler(innerHandler)
{
public int RequestCount { get; private set; }
public long TotalBytesSent { get; private set; }
public long TotalBytesReceived { get; private set; }

protected override async Task<System.Net.Http.HttpResponseMessage> SendAsync(
System.Net.Http.HttpRequestMessage request,
System.Threading.CancellationToken cancellationToken)
{
RequestCount++;

// Track request size
if (request.Content is not null)
{
var requestBytes = await request.Content.ReadAsByteArrayAsync(cancellationToken);
TotalBytesSent += requestBytes.Length;
}

// Send the request
var response = await base.SendAsync(request, cancellationToken);

// Track response size
if (response.Content is not null)
{
// Capture headers before reading content
var originalHeaders = response.Content.Headers.ToList();

var responseBytes = await response.Content.ReadAsByteArrayAsync(cancellationToken);
TotalBytesReceived += responseBytes.Length;

// Re-wrap the content so it can be read again by the caller
response.Content = new System.Net.Http.ByteArrayContent(responseBytes);

// Restore the original content headers
foreach (var header in originalHeaders)
{
response.Content.Headers.TryAddWithoutValidation(header.Key, header.Value);
}
}

return response;
}
}
}
150 changes: 146 additions & 4 deletions implement/GitCore/GitSmartHttp.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Text;
Expand Down Expand Up @@ -94,6 +95,23 @@ public static async Task<ReadOnlyMemory<byte>> FetchPackFileAsync(
string gitUrl,
string commitSha,
HttpClient? httpClient = null)
{
return await FetchPackFileAsync(gitUrl, commitSha, subdirectoryPath: null, httpClient);
}

/// <summary>
/// Fetches a pack file containing only objects needed for a specific subdirectory.
/// </summary>
/// <param name="gitUrl">Git repository URL like https://github.com/owner/repo.git</param>
/// <param name="commitSha">Commit SHA to fetch</param>
/// <param name="subdirectoryPath">Optional subdirectory path to optimize the fetch</param>
/// <param name="httpClient">Optional HttpClient to use for requests. If null, uses a default static client.</param>
/// <returns>Pack file data</returns>
public static async Task<ReadOnlyMemory<byte>> FetchPackFileAsync(
string gitUrl,
string commitSha,
IReadOnlyList<string>? subdirectoryPath,
HttpClient? httpClient = null)
{
httpClient ??= s_httpClient;

Expand All @@ -114,8 +132,13 @@ public static async Task<ReadOnlyMemory<byte>> FetchPackFileAsync(
// Step 2: Request the pack file with the specific commit
var uploadPackUrl = $"{gitUrl}/git-upload-pack";

// Build the request body according to Git protocol
var requestBody = BuildUploadPackRequest(commitSha);
// For subdirectory optimization, use shallow fetch to only get the commit without history
// Note: To further optimize by fetching only specific subdirectory contents would require:
// 1. Git Protocol v2 with partial clone and sparse checkout support
// 2. Multiple round-trips: fetch trees, navigate to subdirectory, then fetch only those blobs
// The current shallow approach (depth=1) already provides significant optimization
int? shallowDepth = (subdirectoryPath is not null && subdirectoryPath.Count > 0) ? 1 : null;
var requestBody = BuildUploadPackRequest(commitSha, shallowDepth);

using var packRequest = new HttpRequestMessage(HttpMethod.Post, uploadPackUrl)
{
Expand Down Expand Up @@ -187,14 +210,112 @@ public static async Task<string> FetchBranchCommitShaAsync(
throw new InvalidOperationException($"Branch {branch} not found in repository {owner}/{repo}");
}

private static byte[] BuildUploadPackRequest(string commitSha)
/// <summary>
/// Fetches a blobless pack file (commit and trees only, no blobs) for optimized subdirectory loading.
/// </summary>
/// <param name="gitUrl">Git repository URL like https://github.com/owner/repo.git</param>
/// <param name="commitSha">Commit SHA to fetch</param>
/// <param name="httpClient">Optional HttpClient to use for requests. If null, uses a default static client.</param>
/// <returns>Pack file data containing commit and trees but no blobs</returns>
public static async Task<ReadOnlyMemory<byte>> FetchBloblessPackFileAsync(
string gitUrl,
string commitSha,
HttpClient? httpClient = null)
{
var requestBody = BuildUploadPackRequest(commitSha, shallowDepth: 1, filter: "blob:none");
return await FetchPackFileWithRequestBodyAsync(gitUrl, requestBody, httpClient);
}

/// <summary>
/// Fetches specific Git objects by their SHAs.
/// </summary>
/// <param name="gitUrl">Git repository URL like https://github.com/owner/repo.git</param>
/// <param name="objectShas">List of object SHAs to fetch</param>
/// <param name="httpClient">Optional HttpClient to use for requests. If null, uses a default static client.</param>
/// <returns>Pack file data containing the requested objects</returns>
public static async Task<ReadOnlyMemory<byte>> FetchSpecificObjectsAsync(
string gitUrl,
IReadOnlyList<string> objectShas,
HttpClient? httpClient = null)
{
var requestBody = BuildUploadPackRequestForSpecificObjects(objectShas);
return await FetchPackFileWithRequestBodyAsync(gitUrl, requestBody, httpClient);
}

/// <summary>
/// Common helper for fetching pack files with a prepared request body.
/// </summary>
private static async Task<ReadOnlyMemory<byte>> FetchPackFileWithRequestBodyAsync(
string gitUrl,
byte[] requestBody,
HttpClient? httpClient)
{
httpClient ??= s_httpClient;

// Ensure the URL ends with .git
if (!gitUrl.EndsWith(".git"))
{
gitUrl = $"{gitUrl}.git";
}

// Step 1: Discover refs
var refsUrl = $"{gitUrl}/info/refs?service=git-upload-pack";
using var refsRequest = new HttpRequestMessage(HttpMethod.Get, refsUrl);
using var refsResponse = await httpClient.SendAsync(refsRequest);
refsResponse.EnsureSuccessStatusCode();

// Step 2: Request pack file
var uploadPackUrl = $"{gitUrl}/git-upload-pack";

using var packRequest = new HttpRequestMessage(HttpMethod.Post, uploadPackUrl)
{
Content = new ByteArrayContent(requestBody)
};

packRequest.Content.Headers.ContentType =
new System.Net.Http.Headers.MediaTypeHeaderValue("application/x-git-upload-pack-request");

using var packResponse = await httpClient.SendAsync(packRequest);
packResponse.EnsureSuccessStatusCode();

var responseData = await packResponse.Content.ReadAsByteArrayAsync();
return ExtractPackFileFromResponse(responseData);
}

private static byte[] BuildUploadPackRequest(string commitSha, int? shallowDepth = null, string? filter = null)
{
using var ms = new MemoryStream();

// Want line: want <sha> <capabilities>
var wantLine = $"want {commitSha} {GitProtocolCapabilities}\n";
var capabilities = GitProtocolCapabilities;

if (shallowDepth.HasValue)
{
capabilities = $"{capabilities} shallow";
}

if (filter is not null)
{
capabilities = $"{capabilities} filter";
}

var wantLine = $"want {commitSha} {capabilities}\n";
WritePktLine(ms, wantLine);

// For shallow clones, request specific depth (only this commit, not its history)
if (shallowDepth.HasValue)
{
var shallowLine = $"deepen {shallowDepth.Value}\n";
WritePktLine(ms, shallowLine);
}

// For filtered fetches, specify the filter
if (filter is not null)
{
var filterLine = $"filter {filter}\n";
WritePktLine(ms, filterLine);
}

// Flush packet
WritePktLine(ms, null);

Expand All @@ -221,6 +342,27 @@ private static void WritePktLine(Stream stream, string? line)
}
}

private static byte[] BuildUploadPackRequestForSpecificObjects(IReadOnlyList<string> objectShas)
{
using var ms = new MemoryStream();

// Request each object with want lines
for (var i = 0; i < objectShas.Count; i++)
{
var capabilities = i is 0 ? $" {GitProtocolCapabilities}" : "";
var wantLine = $"want {objectShas[i]}{capabilities}\n";
WritePktLine(ms, wantLine);
}

// Flush packet
WritePktLine(ms, null);

// Done line
WritePktLine(ms, "done\n");

return ms.ToArray();
}

private static ReadOnlyMemory<byte> ExtractPackFileFromResponse(byte[] responseData)
{
// The response is in pkt-line format with side-band
Expand Down
Loading
Loading