Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion internal/cache/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"gopkg.in/yaml.v3"

"github.com/dependabot/proxy/internal/ctxdata"
"github.com/dependabot/proxy/internal/gitproto"
)

// DB contains the metadata of the disk cache
Expand Down Expand Up @@ -97,8 +98,12 @@ func key(r *http.Request) Key {
k.HeaderHash = hex.EncodeToString(headerHash.Sum(nil))
}
if len(data) > 0 {
hashData := data
if gitproto.IsUploadPackRequest(r) {
hashData = gitproto.NormalizeUploadPackBody(data)
}
hash := sha256.New()
hash.Write(data)
hash.Write(hashData)
k.BodyHash = hex.EncodeToString(hash.Sum(nil))
}
return k
Expand Down
56 changes: 56 additions & 0 deletions internal/cache/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,62 @@ func Test_key(t *testing.T) {
t.Error("headerHash should be blank, got", key.HeaderHash)
}
})

// Integration tests for the gitproto hookup. Edge-case behaviour of the
// normalizer itself lives in internal/gitproto.
const upUrl = "https://github.com/octocat/Hello-World.git/git-upload-pack"
const upCT = "application/x-git-upload-pack-request"
mkUpReq := func(url, ct, body string) *http.Request {
r := httptest.NewRequest("POST", url, strings.NewReader(body))
if ct != "" {
r.Header.Set("Content-Type", ct)
}
return r
}

t.Run("git-upload-pack: agent= drift collapses to one key", func(t *testing.T) {
body1 := "0080want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d multi_ack_detailed no-done side-band-64k thin-pack ofs-delta agent=git/2.43.0\n" +
"0032have 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e\n0009done\n"
body2 := "0080want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d multi_ack_detailed no-done side-band-64k thin-pack ofs-delta agent=git/2.53.0\n" +
"0032have 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e\n0009done\n"
if key(mkUpReq(upUrl, upCT, body1)) != key(mkUpReq(upUrl, upCT, body2)) {
t.Error("agent-only difference must collapse")
}
})

t.Run("git-upload-pack: different haves hash distinctly", func(t *testing.T) {
body1 := "0032want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d\n0000" +
"0032have 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e\n0009done\n"
body2 := "0032want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d\n0000" +
"0032have a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\n0009done\n"
if key(mkUpReq(upUrl, upCT, body1)) == key(mkUpReq(upUrl, upCT, body2)) {
t.Error("haves shape the upstream pack and must not collapse")
}
})

t.Run("git-upload-pack: malformed body falls back to raw hashing", func(t *testing.T) {
if key(mkUpReq(upUrl, upCT, "garbage one")) == key(mkUpReq(upUrl, upCT, "garbage two")) {
t.Error("malformed bodies must hash distinctly")
}
})

t.Run("non-git POST is not normalized even with similar substrings", func(t *testing.T) {
const u = "https://api.github.com/graphql"
k1 := key(httptest.NewRequest("POST", u, strings.NewReader(`{"q":"have stuff agent=foo"}`)))
k2 := key(httptest.NewRequest("POST", u, strings.NewReader(`{"q":"have other agent=bar"}`)))
if k1 == k2 {
t.Error("non-git POSTs must not be normalized")
}
})

t.Run("upload-pack path without Content-Type is not normalized", func(t *testing.T) {
const u = "https://example.com/foo/git-upload-pack"
body1 := "0032have 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e\n0009done\n"
body2 := "0032have a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\n0009done\n"
if key(mkUpReq(u, "", body1)) == key(mkUpReq(u, "", body2)) {
t.Error("missing Content-Type must skip normalization")
}
})
}

type BufferWithClose struct {
Expand Down
118 changes: 118 additions & 0 deletions internal/gitproto/pktline.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package gitproto

// pktType is the kind of a single pkt-line: either a data packet or one of
// the three special framing packets defined by git's smart-HTTP protocol.
//
// Each pkt-line on the wire begins with a 4-hex-digit length that includes
// itself, or is one of: "0000" flush, "0001" delim (v2), "0002" response-end.
// Any length >= 4 is a data packet whose payload is (length - 4) bytes.
// See https://git-scm.com/docs/protocol-common#_pkt_line_format.
type pktType int

const (
pktData pktType = iota
pktFlush
pktDelim
pktResponseEnd
)

// packet is one parsed pkt-line. payload is set only when typ == pktData and
// excludes the 4-byte length prefix.
type packet struct {
typ pktType
payload []byte
}

const hexDigits = "0123456789abcdef"

// parseHex4 decodes a 4-byte ASCII hex prefix without allocating a string.
func parseHex4(b []byte) (n int, ok bool) {
for i := 0; i < 4; i++ {
c := b[i]
var v int
switch {
case c >= '0' && c <= '9':
v = int(c - '0')
case c >= 'a' && c <= 'f':
v = int(c-'a') + 10
case c >= 'A' && c <= 'F':
v = int(c-'A') + 10
default:
return 0, false
}
n = n<<4 | v
}
return n, true
}

// parsePktLine returns ok=false on malformed or truncated input so callers
// can fall back to opaque hashing of the original bytes.
func parsePktLine(data []byte) (packets []packet, ok bool) {
for len(data) > 0 {
if len(data) < 4 {
return nil, false
}
n, ok := parseHex4(data[:4])
if !ok {
return nil, false
}
switch n {
case 0:
packets = append(packets, packet{typ: pktFlush})
data = data[4:]
case 1:
packets = append(packets, packet{typ: pktDelim})
data = data[4:]
case 2:
packets = append(packets, packet{typ: pktResponseEnd})
data = data[4:]
case 3:
// Reserved; not used by real git. Treat as malformed.
return nil, false
default:
if n > len(data) {
return nil, false
}
packets = append(packets, packet{typ: pktData, payload: data[4:n]})
data = data[n:]
}
}
return packets, true
}

// encodePktLine recomputes each data packet's length prefix, which is what
// makes normalization stable across payloads of differing length.
func encodePktLine(packets []packet) []byte {
buf := make([]byte, 0, encodedSize(packets))
for _, p := range packets {
switch p.typ {
case pktFlush:
buf = append(buf, "0000"...)
case pktDelim:
buf = append(buf, "0001"...)
case pktResponseEnd:
buf = append(buf, "0002"...)
case pktData:
n := 4 + len(p.payload)
buf = append(buf,
hexDigits[(n>>12)&0xf],
hexDigits[(n>>8)&0xf],
hexDigits[(n>>4)&0xf],
hexDigits[n&0xf],
)
buf = append(buf, p.payload...)
}
Comment thread
kbukum1 marked this conversation as resolved.
}
return buf
}

func encodedSize(packets []packet) int {
size := 0
for _, p := range packets {
size += 4
if p.typ == pktData {
size += len(p.payload)
}
}
return size
}
107 changes: 107 additions & 0 deletions internal/gitproto/pktline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package gitproto

import (
"bytes"
"testing"
)

func TestParsePktLine_Empty(t *testing.T) {
pkts, ok := parsePktLine(nil)
if !ok {
t.Error("expected ok=true for empty input")
}
if len(pkts) != 0 {
t.Fatalf("expected 0 packets, got %d", len(pkts))
}
}

func TestParsePktLine_SpecialPackets(t *testing.T) {
cases := map[string]pktType{
"0000": pktFlush,
"0001": pktDelim,
"0002": pktResponseEnd,
}
for input, want := range cases {
pkts, ok := parsePktLine([]byte(input))
if !ok || len(pkts) != 1 || pkts[0].typ != want {
t.Errorf("input %q: got %+v ok=%v, want type %d", input, pkts, ok, want)
}
}
}

func TestParsePktLine_DataPacket(t *testing.T) {
// "000ahello\n" = length 0x000a (10), payload "hello\n"
pkts, ok := parsePktLine([]byte("000ahello\n"))
if !ok || len(pkts) != 1 || pkts[0].typ != pktData || string(pkts[0].payload) != "hello\n" {
t.Errorf("got %+v ok=%v", pkts, ok)
}
}

func TestParsePktLine_MalformedAndTruncated(t *testing.T) {
// Bad hex prefix.
if _, ok := parsePktLine([]byte("gggghi")); ok {
t.Error("expected ok=false for malformed length prefix")
}
// Length claims 0x0020 but only 9 bytes available.
if _, ok := parsePktLine([]byte("0020short")); ok {
t.Error("expected ok=false for truncated packet")
}
// Length 3 is reserved; we treat as malformed.
if _, ok := parsePktLine([]byte("00030000")); ok {
t.Error("expected ok=false for reserved length 3")
}
// Less than 4 bytes.
if _, ok := parsePktLine([]byte("ab")); ok {
t.Error("expected ok=false for sub-prefix input")
}
}

func TestParsePktLine_RealV1Body(t *testing.T) {
// Realistic v1 upload-pack body from github.com/octocat/Hello-World
input := "00a4want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d multi_ack_detailed no-done side-band-64k thin-pack no-progress ofs-delta deepen-since deepen-not agent=git/2.43.0\n" +
"0032want b1b3f9723831141a31a1a7252a213e216ea76e56\n" +
"0000" +
"0032have 553c2077f0edc3d5dc5d17262f6aa498e69d6f8e\n" +
"0009done\n"
pkts, ok := parsePktLine([]byte(input))
if !ok {
t.Fatal("expected ok=true for well-formed v1 body")
}
wantTypes := []pktType{pktData, pktData, pktFlush, pktData, pktData}
if len(pkts) != len(wantTypes) {
t.Fatalf("got %d packets, want %d", len(pkts), len(wantTypes))
}
for i, want := range wantTypes {
if pkts[i].typ != want {
t.Errorf("packet %d: got type %d, want %d", i, pkts[i].typ, want)
}
}
}

func TestParsePktLine_RealV2Body(t *testing.T) {
input := "0012command=fetch\n" +
"0015agent=git/2.43.0\n" +
"0001" +
"000ddeepen 1\n" +
"0032want 7fd1a60b01f91b314f59955a4e4d4e80d8edf11d\n" +
"0009done\n" +
"0000"
pkts, ok := parsePktLine([]byte(input))
if !ok || len(pkts) != 7 {
t.Fatalf("got %d packets ok=%v, want 7 ok=true", len(pkts), ok)
}
if pkts[2].typ != pktDelim || pkts[6].typ != pktFlush {
t.Error("special packets misidentified")
}
}

func TestEncodePktLine_RoundTrip(t *testing.T) {
input := []byte("000ahello\n" + "0000" + "0001" + "000aworld\n" + "0002")
pkts, ok := parsePktLine(input)
if !ok {
t.Fatal("parse failed on well-formed input")
}
if got := encodePktLine(pkts); !bytes.Equal(got, input) {
t.Errorf("round-trip mismatch:\n in: %q\n out: %q", input, got)
}
}
Loading
Loading