From 9e1f3eb7eb4b9103006985ff7246e4bc004c4b8f Mon Sep 17 00:00:00 2001 From: Billy Lynch Date: Tue, 28 Apr 2026 16:50:21 -0400 Subject: [PATCH 1/2] Verify against raw git object bytes to prevent parser trust-confusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit go-git's loose object parser uses last-wins semantics for duplicate singleton headers (tree, author, committer, etc.), while git-core uses first-wins. An attacker can craft a commit or tag whose raw bytes hash to one set of contents under git-core but re-encode through go-git to a different signed payload, letting a legitimate signature verify against attacker-controlled bytes. Replace the go-git decode + EncodeWithoutSignature path with SplitCommit and SplitTag, which operate directly on the object-database bytes (the same bytes git-core feeds its verifier) and reject objects with structural ambiguities — duplicate singleton headers, duplicate gpgsig, malformed gpgsig continuations. ObjectHash now reassembles via JoinCommit/JoinTag so the recorded hash matches git-core. Signed-off-by: Billy Lynch --- internal/commands/verify-tag/verify_tag.go | 36 +-- internal/commands/verify/verify.go | 29 +- internal/commands/verify/verify_test.go | 73 +++++ internal/gitsign/invalid_object_test.go | 193 ++++++++++++ pkg/git/git_test.go | 126 +++----- pkg/git/rawobj.go | 216 +++++++++++++ pkg/git/rawobj_test.go | 336 +++++++++++++++++++++ pkg/git/testdata/commit.txt | 45 +++ pkg/git/testdata/tag.txt | 30 ++ pkg/git/verify.go | 79 ++--- 10 files changed, 984 insertions(+), 179 deletions(-) create mode 100644 internal/commands/verify/verify_test.go create mode 100644 internal/gitsign/invalid_object_test.go create mode 100644 pkg/git/rawobj.go create mode 100644 pkg/git/rawobj_test.go create mode 100644 pkg/git/testdata/commit.txt create mode 100644 pkg/git/testdata/tag.txt diff --git a/internal/commands/verify-tag/verify_tag.go b/internal/commands/verify-tag/verify_tag.go index e7674042..e1a041e5 100644 --- a/internal/commands/verify-tag/verify_tag.go +++ b/internal/commands/verify-tag/verify_tag.go @@ -27,6 +27,7 @@ import ( "github.com/sigstore/gitsign/internal/commands/verify" "github.com/sigstore/gitsign/internal/config" "github.com/sigstore/gitsign/internal/gitsign" + "github.com/sigstore/gitsign/pkg/git" "github.com/spf13/cobra" ) @@ -59,32 +60,31 @@ func (o *options) Run(_ io.Writer, args []string) error { return fmt.Errorf("error resolving tag reference: %w", err) } - // Get the tag object - tagObj, err := repo.TagObject(ref.Hash()) + // Read the raw tag object bytes directly from the object store. + // Verifying against the raw bytes (rather than bytes re-encoded through + // go-git) is what git-core does and avoids trust-confusion attacks where + // go-git's loose parser resolves a malformed tag differently than git-core. + obj, err := repo.Storer.EncodedObject(plumbing.TagObject, ref.Hash()) if err != nil { return fmt.Errorf("error reading tag object: %w", err) } - - // Extract the signature - sig := []byte(tagObj.PGPSignature) - p, _ := pem.Decode(sig) - if p == nil || p.Type != "SIGNED MESSAGE" { - return fmt.Errorf("unsupported signature type") - } - - // Get the tag data without the signature - tagData := new(plumbing.MemoryObject) - if err := tagObj.EncodeWithoutSignature(tagData); err != nil { - return err - } - r, err := tagData.Reader() + r, err := obj.Reader() if err != nil { return err } defer r.Close() // nolint:errcheck - data, err := io.ReadAll(r) + + data, sig, err := git.SplitTag(r) if err != nil { - return err + return fmt.Errorf("error extracting tag signature: %w", err) + } + + p, _ := pem.Decode(sig) + if p == nil { + return fmt.Errorf("%w: not a PEM block", git.ErrUnsupportedSignatureType) + } + if p.Type != "SIGNED MESSAGE" { + return fmt.Errorf("%w: %q", git.ErrUnsupportedSignatureType, p.Type) } // Verify the signature diff --git a/internal/commands/verify/verify.go b/internal/commands/verify/verify.go index cbcf93d2..40fcc43a 100644 --- a/internal/commands/verify/verify.go +++ b/internal/commands/verify/verify.go @@ -60,29 +60,28 @@ func (o *options) Run(_ io.Writer, args []string) error { if err != nil { return fmt.Errorf("error resolving commit object: %w", err) } - c, err := repo.CommitObject(*h) + + obj, err := repo.Storer.EncodedObject(plumbing.CommitObject, *h) if err != nil { return fmt.Errorf("error reading commit object: %w", err) } - - sig := []byte(c.PGPSignature) - p, _ := pem.Decode(sig) - if p == nil || p.Type != "SIGNED MESSAGE" { - return fmt.Errorf("unsupported signature type") - } - - c2 := new(plumbing.MemoryObject) - if err := c.EncodeWithoutSignature(c2); err != nil { - return err - } - r, err := c2.Reader() + r, err := obj.Reader() if err != nil { return err } defer r.Close() // nolint:errcheck - data, err := io.ReadAll(r) + + data, sig, err := git.SplitCommit(r) if err != nil { - return err + return fmt.Errorf("error extracting commit signature: %w", err) + } + + p, _ := pem.Decode(sig) + if p == nil { + return fmt.Errorf("%w: not a PEM block", git.ErrUnsupportedSignatureType) + } + if p.Type != "SIGNED MESSAGE" { + return fmt.Errorf("%w: %q", git.ErrUnsupportedSignatureType, p.Type) } v, err := gitsign.NewVerifierWithCosignOpts(ctx, o.Config, &o.CertVerifyOptions) diff --git a/internal/commands/verify/verify_test.go b/internal/commands/verify/verify_test.go new file mode 100644 index 00000000..1877ea9d --- /dev/null +++ b/internal/commands/verify/verify_test.go @@ -0,0 +1,73 @@ +// Copyright 2026 The Sigstore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package verify + +import ( + "errors" + "io" + "testing" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/sigstore/gitsign/pkg/git" +) + +// TestRun_RejectsUnsupportedSignatureType confirms the sentinel is wrapped +// so callers can errors.Is on it. End-to-end coverage of the GHSA +// trust-confusion attack lives in +// internal/gitsign/invalid_object_test.go::TestDuplicateTreeTrustConfusion. +func TestRun_RejectsUnsupportedSignatureType(t *testing.T) { + tmpDir := t.TempDir() + repo, err := gogit.PlainInit(tmpDir, false) + if err != nil { + t.Fatalf("PlainInit: %v", err) + } + + // Well-formed commit but with a PGP SIGNATURE (not SIGNED MESSAGE) in gpgsig. + raw := []byte(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +gpgsig -----BEGIN PGP SIGNATURE----- + ZmFrZQ== + -----END PGP SIGNATURE----- + +hi +`) + + obj := repo.Storer.NewEncodedObject() + obj.SetType(plumbing.CommitObject) + w, err := obj.Writer() + if err != nil { + t.Fatalf("obj.Writer: %v", err) + } + if _, err := w.Write(raw); err != nil { + t.Fatalf("write: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("close: %v", err) + } + h, err := repo.Storer.SetEncodedObject(obj) + if err != nil { + t.Fatalf("SetEncodedObject: %v", err) + } + + t.Chdir(tmpDir) + + opts := &options{} + err = opts.Run(io.Discard, []string{h.String()}) + if !errors.Is(err, git.ErrUnsupportedSignatureType) { + t.Fatalf("want error wrapping ErrUnsupportedSignatureType, got %v", err) + } +} diff --git a/internal/gitsign/invalid_object_test.go b/internal/gitsign/invalid_object_test.go new file mode 100644 index 00000000..f0b31c30 --- /dev/null +++ b/internal/gitsign/invalid_object_test.go @@ -0,0 +1,193 @@ +// Copyright 2026 The Sigstore Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gitsign + +import ( + "bytes" + "context" + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "io" + "math/big" + "strings" + "testing" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/sigstore/gitsign/internal/signature" + "github.com/sigstore/gitsign/pkg/git" +) + +// TestDuplicateTreeTrustConfusion is an end-to-end reproduction of a parser +// trust-confusion attack: an attacker replays a legitimate signature against +// a malformed commit with two tree headers. go-git's loose parser keeps the +// last tree (so a signature over that canonical form verifies) while git-core +// and the on-disk hash reflect the first. The test: +// +// 1. Confirms the crafted signature really does verify against the re-encoded +// (last-wins) bytes — proving the PoC is a working forgery, not a typo. +// 2. Confirms the fix: when the verifier is fed the raw object bytes (the +// SplitCommit output), the signature fails to verify because the bytes +// differ from what was signed. +// 3. Confirms a well-formed commit with the same signature still verifies — +// guards against an over-aggressive verifier path. +func TestDuplicateTreeTrustConfusion(t *testing.T) { + cert, priv := generateCert(t, &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "alice"}, + NotBefore: time.Now().Add(-time.Minute), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageCodeSigning}, + }) + + const ( + legitTree = "b333504b8cf3d9c314fed2cc242c5c38e89534a5" + attackerTree = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + author = "author Alice 1700000000 +0000" + committer = "committer Alice 1700000000 +0000" + message = "legit commit\n" + ) + + // Canonical commit body — this is what Alice intended to sign. A single + // tree header, message "legit commit". + canonical := []byte(fmt.Sprintf("tree %s\n%s\n%s\n\n%s", + legitTree, author, committer, message)) + + id := &identity{cert: cert, priv: priv} + resp, err := signature.Sign(context.Background(), id, canonical, signature.SignOptions{ + Detached: true, + Armor: true, + IncludeCerts: 0, + }) + if err != nil { + t.Fatalf("Sign: %v", err) + } + + roots := x509.NewCertPool() + roots.AddCert(cert) + gv, err := git.NewCertVerifier(git.WithRootPool(roots)) + if err != nil { + t.Fatalf("NewCertVerifier: %v", err) + } + + malformedRaw := []byte(fmt.Sprintf( + "tree %s\ntree %s\n%s\n%s\n%s\n%s", + attackerTree, legitTree, + author, committer, + indent("gpgsig ", string(resp.Signature)), + message, + )) + + t.Run("signature is a genuine forgery against the re-encoded form", func(t *testing.T) { + // Simulate the pre-fix behavior: load the malformed commit through + // go-git and re-encode it via EncodeWithoutSignature (which drops the + // first tree under last-wins), then verify. This MUST succeed, + // otherwise the PoC isn't actually demonstrating the attack and the + // rejection assertion below would pass vacuously. + reencoded := reencodeViaGoGit(t, malformedRaw) + + if _, err := gv.Verify(context.Background(), reencoded, resp.Signature, true); err != nil { + t.Fatalf("pre-fix behavior check: expected signature to verify over re-encoded bytes (proves the PoC is genuine), got: %v", err) + } + }) + + t.Run("fix: signature fails to verify against the raw malformed bytes", func(t *testing.T) { + // SplitCommit hands the verifier the raw object bytes (with both + // tree headers intact) instead of the go-git-normalized form. The + // signature was made over the canonical (single-tree) bytes, so the + // cryptographic check rejects this. + body, sig, err := git.SplitCommit(bytes.NewReader(malformedRaw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if _, err := gv.Verify(context.Background(), body, sig, true); err == nil { + t.Fatalf("expected signature verification to fail against malformed raw bytes, got nil error") + } + }) + + t.Run("fix accepts the legitimate commit and signature verifies", func(t *testing.T) { + // Positive control: a well-formed commit with the same signature + // passes SplitCommit and the signature verifies. Guards against an + // over-aggressive validator that also rejects legitimate commits. + wellFormed := []byte(fmt.Sprintf( + "tree %s\n%s\n%s\n%s\n%s", + legitTree, + author, committer, + indent("gpgsig ", string(resp.Signature)), + message, + )) + + data, sig, err := git.SplitCommit(bytes.NewReader(wellFormed)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if _, err := gv.Verify(context.Background(), data, sig, true); err != nil { + t.Errorf("signature verify over well-formed bytes: %v", err) + } + }) +} + +// indent formats the first line with the given prefix and subsequent lines +// with a single leading space — the git gpgsig header wire format. The output +// ends with a newline suitable for placement as a commit header followed by +// the blank-line message separator. +func indent(prefix, body string) string { + body = strings.TrimSuffix(body, "\n") + lines := strings.Split(body, "\n") + return prefix + strings.Join(lines, "\n ") + "\n" +} + +// reencodeViaGoGit mirrors exactly what the pre-fix gitsign verify path did: +// parse the raw commit through go-git and re-encode it without the signature +// header. This exercises the last-wins normalization that made the GHSA PoC +// work. +func reencodeViaGoGit(t *testing.T, raw []byte) []byte { + t.Helper() + obj := memory.NewStorage().NewEncodedObject() + obj.SetType(plumbing.CommitObject) + w, err := obj.Writer() + if err != nil { + t.Fatal(err) + } + if _, err := w.Write(raw); err != nil { + t.Fatal(err) + } + if err := w.Close(); err != nil { + t.Fatal(err) + } + + var c object.Commit + if err := c.Decode(obj); err != nil { + t.Fatal(err) + } + out := memory.NewStorage().NewEncodedObject() + if err := c.EncodeWithoutSignature(out); err != nil { + t.Fatal(err) + } + r, err := out.Reader() + if err != nil { + t.Fatal(err) + } + defer r.Close() // nolint:errcheck + data, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + return data +} diff --git a/pkg/git/git_test.go b/pkg/git/git_test.go index f5b38183..81f5f633 100644 --- a/pkg/git/git_test.go +++ b/pkg/git/git_test.go @@ -14,104 +14,60 @@ package git -import "testing" - -const ( - // These are real commit values generated in a test repo that were manually verified. - - // Rekor index: 2802961 - tagBody = `object 040b9af339e69d18848b7bbe05cb27ee42bb0161 -type commit -tag signed-tag2 -tagger Billy Lynch 1656531453 -0400 - -asdf -` - tagSig = `-----BEGIN SIGNED MESSAGE----- -MIIEBQYJKoZIhvcNAQcCoIID9jCCA/ICAQExDTALBglghkgBZQMEAgEwCwYJKoZI -hvcNAQcBoIICpjCCAqIwggIooAMCAQICFGc8V7+B2VlJeFLpglonkbyb2kVeMAoG -CCqGSM49BAMDMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2ln -c3RvcmUtaW50ZXJtZWRpYXRlMB4XDTIyMDYyOTE5MzczOVoXDTIyMDYyOTE5NDcz -OVowADBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABP8JBFjhGqQsQCBmZqyuSHcG -KZpDDRdpq7cl8Bhwuvu9A2bDz0gcuA/Nv18fKtikguBw6YBmEPi8S/YMYgMctVyj -ggFHMIIBQzAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwHQYD -VR0OBBYEFMhi60DZPBYkwhDEuiltjyvxYYTDMB8GA1UdIwQYMBaAFN/T6c9WJBGW -+ajY6ShVosYuGGQ/MCIGA1UdEQEB/wQYMBaBFGJpbGx5QGNoYWluZ3VhcmQuZGV2 -MCwGCisGAQQBg78wAQEEHmh0dHBzOi8vZ2l0aHViLmNvbS9sb2dpbi9vYXV0aDCB -iQYKKwYBBAHWeQIEAgR7BHkAdwB1AAhgkvAoUv9oRdHRayeEnEVnGKwWPcM40m3m -vCIGNm9yAAABgbD4HlAAAAQDAEYwRAIgON4g6BzdFgOIcCFk+8EXKpEw1XD0/DZ2 -7gcb9Q/Jeg0CIGozxLGJS71uA2OU3JD6pGWCdnpYVsiG44/Em5w34SHmMAoGCCqG -SM49BAMDA2gAMGUCMQDjLNl6Zaj5HbfLqqUvWNgz/R1VoQ3QG88kzu3GY0PodO8K -QDcgt8bcGXzEdKkSFg4CMHIkGGLrG3bOYsjyIqZxiO6ess1jJxsFnM+GzvjwNRJk -eWF9g96u/pNN8KA5VhveljGCASUwggEhAgEBME8wNzEVMBMGA1UEChMMc2lnc3Rv -cmUuZGV2MR4wHAYDVQQDExVzaWdzdG9yZS1pbnRlcm1lZGlhdGUCFGc8V7+B2VlJ -eFLpglonkbyb2kVeMAsGCWCGSAFlAwQCAaBpMBgGCSqGSIb3DQEJAzELBgkqhkiG -9w0BBwEwHAYJKoZIhvcNAQkFMQ8XDTIyMDYyOTE5MzczOVowLwYJKoZIhvcNAQkE -MSIEINZzCK5apWIVIKK26tVflr6zNoFkJm8SXQC5T65qwF1BMAoGCCqGSM49BAMC -BEcwRQIgfAl7Elc0DB8UEMOXo3ZxKmN7zTrMO/tvhu1Himgc9IYCIQCxf06wWHVw -YKHxU2tY8MNGomLVk0LyA/QaHQnoo34t8A== ------END SIGNED MESSAGE----- -` - tagSHA = "ed092bb8688d6e37185bcdb58900940703c1a292" +import ( + "bytes" + "os" + "testing" +) - // Rekor index: 2801760 - commitBody = `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 -parent 2dc0ab59d7f0a7a62423bd181d9e2ab3adb7b56d -author Billy Lynch 1656524971 -0400 -committer Billy Lynch 1656524971 -0400 +// loadObject reads a raw object from testdata. The bytes are exactly what +// git's object store holds (i.e. the output of `git cat-file -p `), +// without the " \0" prefix that git prepends before hashing. +func loadObject(t *testing.T, name string) []byte { + t.Helper() + raw, err := os.ReadFile("testdata/" + name) + if err != nil { + t.Fatalf("read testdata/%s: %v", name, err) + } + return raw +} -foo -` - commitSig = `-----BEGIN SIGNED MESSAGE----- -MIIEBwYJKoZIhvcNAQcCoIID+DCCA/QCAQExDTALBglghkgBZQMEAgEwCwYJKoZI -hvcNAQcBoIICqDCCAqQwggIqoAMCAQICFHtMvZZL50P5bLkgDxwMf2MN4jdAMAoG -CCqGSM49BAMDMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2ln -c3RvcmUtaW50ZXJtZWRpYXRlMB4XDTIyMDYyOTE3NDkzNFoXDTIyMDYyOTE3NTkz -NFowADBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABNf9io+JonCZhwe/dSkSoJ/Y -eRun8C7xhPVF3FhoPnPVWdywaAEIkniA2WSHXLHt5aQN/08bV65haMZA/Luhmhaj -ggFJMIIBRTAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwHQYD -VR0OBBYEFGzhjCzFUI0caspJJfD4bToYxfDhMB8GA1UdIwQYMBaAFN/T6c9WJBGW -+ajY6ShVosYuGGQ/MCIGA1UdEQEB/wQYMBaBFGJpbGx5QGNoYWluZ3VhcmQuZGV2 -MCwGCisGAQQBg78wAQEEHmh0dHBzOi8vZ2l0aHViLmNvbS9sb2dpbi9vYXV0aDCB -iwYKKwYBBAHWeQIEAgR9BHsAeQB3AAhgkvAoUv9oRdHRayeEnEVnGKwWPcM40m3m -vCIGNm9yAAABgbCVKBkAAAQDAEgwRgIhAJHJalxdErw5icNqfgWtyrv75XGXxAZz -F/J4b7B8ikQAAiEAj8g8ZiSIGmePmES19Y/yFeGj6Fz0NGE2Rk5uJdKyAGEwCgYI -KoZIzj0EAwMDaAAwZQIxAKpQFL9D5s1YVEmNWBoEQ1oo6gBESGhd5L1Kcdq52Ltt -KWXKKB7tpVRwC0lfof2ILgIwU1LTaKeKWb0vToMY9InoS2+hAVljbEh3oxKm/JoX -hiRx2GiDe2OyLCs76/kbH6C/MYIBJTCCASECAQEwTzA3MRUwEwYDVQQKEwxzaWdz -dG9yZS5kZXYxHjAcBgNVBAMTFXNpZ3N0b3JlLWludGVybWVkaWF0ZQIUe0y9lkvn -Q/lsuSAPHAx/Yw3iN0AwCwYJYIZIAWUDBAIBoGkwGAYJKoZIhvcNAQkDMQsGCSqG -SIb3DQEHATAcBgkqhkiG9w0BCQUxDxcNMjIwNjI5MTc0OTM0WjAvBgkqhkiG9w0B -CQQxIgQgSbThfvXoc6INDxPzRtlUu0TTBjFLm4XmwuxXAzfsZmkwCgYIKoZIzj0E -AwIERzBFAiBeNZewVOFI5aa7bPUXa05HDgz5yevQ9aPclDX6U+koTAIhAMbyysil -7I/UWLzhwM+9iusn3JXy71akUTcrqi2MNPaO ------END SIGNED MESSAGE----- -` - commitSHA = "040b9af339e69d18848b7bbe05cb27ee42bb0161" +// SHAs of the objects in testdata, verified against git's stored hash: +// +// printf 'commit %d\0' $(wc -c < testdata/commit.txt) | cat - testdata/commit.txt | shasum +// printf 'tag %d\0' $(wc -c < testdata/tag.txt) | cat - testdata/tag.txt | shasum +const ( + commitSHA = "4954440f9953588782896a1a473d8968765db82b" + tagSHA = "a7c0f87e7d8f475cfe66d9c848b77cf3b85d860b" ) func TestObjectHash(t *testing.T) { for _, tc := range []struct { - name string - body string - sig string - sha string + name string + file string + split func([]byte) ([]byte, []byte, error) + sha string }{ { - name: "tag", - body: tagBody, - sig: tagSig, - sha: tagSHA, + name: "commit", + file: "commit.txt", + split: func(raw []byte) ([]byte, []byte, error) { return SplitCommit(bytes.NewReader(raw)) }, + sha: commitSHA, }, { - name: "commit", - body: commitBody, - sig: commitSig, - sha: commitSHA, + name: "tag", + file: "tag.txt", + split: func(raw []byte) ([]byte, []byte, error) { return SplitTag(bytes.NewReader(raw)) }, + sha: tagSHA, }, } { t.Run(tc.name, func(t *testing.T) { - got, err := ObjectHash([]byte(tc.body), []byte(tc.sig)) + raw := loadObject(t, tc.file) + body, sig, err := tc.split(raw) + if err != nil { + t.Fatalf("split: %v", err) + } + got, err := ObjectHash(body, sig) if err != nil { t.Fatal(err) } diff --git a/pkg/git/rawobj.go b/pkg/git/rawobj.go new file mode 100644 index 00000000..084f0f2a --- /dev/null +++ b/pkg/git/rawobj.go @@ -0,0 +1,216 @@ +// +// Copyright 2026 The Sigstore Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "unicode" +) + +var ( + // ErrMalformedObject is returned for structural ambiguities the + // downstream signature check can't catch on its own — currently just + // multiple gpgsig headers, where it's unclear which signature to extract. + ErrMalformedObject = errors.New("malformed git object") + // ErrUnsupportedSignatureType is returned when the embedded signature is + // not a format gitsign can verify. gitsign only accepts PEM blocks of type + // "SIGNED MESSAGE" (CMS/PKCS7). + ErrUnsupportedSignatureType = errors.New("unsupported signature type") +) + +const gpgsigPrefix = "gpgsig " + +// SplitCommit splits the raw bytes of a commit object (in object-database form, +// without the "commit \0" prefix) into the payload that was signed and +// the PEM-encoded signature. It operates purely on the raw bytes, mirroring +// what git-core feeds to its signature verifier, and does not go through +// go-git's object parser. +// +// The trust-confusion class of attack (GHSA-7rmh-48mx-2vwc) relied on gitsign +// re-encoding through go-git before verification — which normalized away +// duplicate headers and let a signature over the canonical form verify +// against attacker-controlled raw bytes. Verifying directly over the raw +// bytes blocks that: any structural divergence between what was signed and +// what's stored makes the signature fail to verify cryptographically. +// Consequently, SplitCommit doesn't reject merely "weird but git-valid" +// objects (e.g. duplicate tree headers); the signature check below is what +// catches them. The one structural thing we do reject is multiple gpgsig +// headers, since that's ambiguous about which signature to extract. +func SplitCommit(r io.Reader) (payload, sig []byte, err error) { + scanner := bufio.NewScanner(r) + + var ( + payloadBuf bytes.Buffer + sigBuf bytes.Buffer + inGpgsig bool + inBody bool + ) + + for scanner.Scan() { + line := scanner.Bytes() + + if inBody { + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') + continue + } + + if len(line) == 0 { + // Blank line terminates the header section. + payloadBuf.WriteByte('\n') + inBody = true + inGpgsig = false + continue + } + + if inGpgsig { + // git-core requires exactly one leading space on a gpgsig + // continuation (see git/commit.c parse_buffer_signed_by_header). + // We accept any leading whitespace and strip it: the signature + // is cryptographically verified downstream, so leniency here + // can't cause trust confusion, and being permissive avoids + // rejecting signatures produced by tooling that wraps with + // slightly different indentation. + if trimmed := bytes.TrimLeftFunc(line, unicode.IsSpace); len(trimmed) < len(line) { + sigBuf.Write(trimmed) + sigBuf.WriteByte('\n') + continue + } + // Non-continuation line -> gpgsig block ended; fall through and + // process this line as a fresh header. + inGpgsig = false + } + + if bytes.HasPrefix(line, []byte(gpgsigPrefix)) { + if sigBuf.Len() > 0 { + return nil, nil, fmt.Errorf("%w: duplicate gpgsig header", ErrMalformedObject) + } + inGpgsig = true + sigBuf.Write(line[len(gpgsigPrefix):]) + sigBuf.WriteByte('\n') + continue + } + + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') + } + if err := scanner.Err(); err != nil { + return nil, nil, err + } + + return payloadBuf.Bytes(), sigBuf.Bytes(), nil +} + +// JoinCommit is the inverse of SplitCommit. It inserts a gpgsig header +// containing sig into payload, immediately before the blank line separating +// headers from message body. Continuation lines are indented with a single +// space to match git-core's wire format. +// +// sig is the PEM-encoded signature with lines separated by "\n". A trailing +// newline on sig is ignored. +func JoinCommit(payload, sig []byte) ([]byte, error) { + hdrEnd := bytes.Index(payload, []byte("\n\n")) + if hdrEnd < 0 { + return nil, fmt.Errorf("%w: payload has no header terminator", ErrMalformedObject) + } + // Split signature into lines, drop a single trailing newline if any. + s := sig + if len(s) > 0 && s[len(s)-1] == '\n' { + s = s[:len(s)-1] + } + lines := bytes.Split(s, []byte{'\n'}) + + var hdr bytes.Buffer + hdr.WriteString(gpgsigPrefix) + hdr.Write(lines[0]) + hdr.WriteByte('\n') + for _, l := range lines[1:] { + hdr.WriteByte(' ') + hdr.Write(l) + hdr.WriteByte('\n') + } + + out := make([]byte, 0, len(payload)+hdr.Len()) + out = append(out, payload[:hdrEnd+1]...) // include the \n terminating the last header + out = append(out, hdr.Bytes()...) + out = append(out, payload[hdrEnd+1:]...) // the remaining \n + message body + return out, nil +} + +// SplitTag splits the raw bytes of a tag object into the payload that was +// signed and the trailing PEM signature block. Like SplitCommit, it works on +// raw bytes and does not invoke go-git's parser, so any divergence between +// what was signed and what's stored is caught by the cryptographic check +// downstream rather than by structural validation here. The signature is +// taken to start at the last line-anchored "-----BEGIN " marker, matching +// git-core's tag verification path. +func SplitTag(r io.Reader) (payload, sig []byte, err error) { + scanner := bufio.NewScanner(r) + + var ( + payloadBuf bytes.Buffer + sigBuf bytes.Buffer + inBody bool + ) + + for scanner.Scan() { + line := scanner.Bytes() + + if !inBody { + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') + if len(line) == 0 { + inBody = true + } + continue + } + + // In body. Track only the last "-----BEGIN " block as the signature + // — anything before it (including any earlier PEM-looking lines in + // the message body) belongs in payload. + switch { + case bytes.HasPrefix(line, []byte("-----BEGIN ")): + payloadBuf.Write(sigBuf.Bytes()) + sigBuf.Reset() + sigBuf.Write(line) + sigBuf.WriteByte('\n') + case sigBuf.Len() > 0: + sigBuf.Write(line) + sigBuf.WriteByte('\n') + default: + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') + } + } + if err := scanner.Err(); err != nil { + return nil, nil, err + } + + return payloadBuf.Bytes(), sigBuf.Bytes(), nil +} + +// JoinTag is the inverse of SplitTag. The signature is appended verbatim to +// the payload. +func JoinTag(payload, sig []byte) []byte { + out := make([]byte, 0, len(payload)+len(sig)) + out = append(out, payload...) + out = append(out, sig...) + return out +} diff --git a/pkg/git/rawobj_test.go b/pkg/git/rawobj_test.go new file mode 100644 index 00000000..af59b36b --- /dev/null +++ b/pkg/git/rawobj_test.go @@ -0,0 +1,336 @@ +// +// Copyright 2026 The Sigstore Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "bytes" + "errors" + "fmt" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +// fakeSig is a syntactically-valid PEM block used for tests that exercise +// the parser/joiner shape but don't need a real cryptographic signature. +const fakeSig = `-----BEGIN SIGNED MESSAGE----- +ZmFrZXBheWxvYWQ= +-----END SIGNED MESSAGE----- +` + +// gpgsigBlock formats a PEM signature as a git gpgsig header block: first +// line preceded by "gpgsig ", subsequent lines by a single space, ending +// with a trailing newline. +func gpgsigBlock(sig string) string { + sig = strings.TrimSuffix(sig, "\n") + lines := strings.Split(sig, "\n") + return "gpgsig " + strings.Join(lines, "\n ") + "\n" +} + +// TestSplitCommit_WellFormed runs against a real signed commit +// (testdata/commit.txt = `git cat-file -p HEAD` from a gitsign-signed commit +// in this repo) and confirms split + join round-trips byte-for-byte. +func TestSplitCommit_WellFormed(t *testing.T) { + raw := loadObject(t, "commit.txt") + + body, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if !bytes.HasPrefix(body, []byte("tree ")) { + t.Errorf("body should start with 'tree ', got %q", firstLine(body)) + } + if bytes.Contains(body, []byte("\ngpgsig ")) { + t.Errorf("body should not contain gpgsig header") + } + if !bytes.HasPrefix(sig, []byte("-----BEGIN ")) { + t.Errorf("sig should start with PEM marker, got %q", firstLine(sig)) + } + + rejoined, err := JoinCommit(body, sig) + if err != nil { + t.Fatalf("JoinCommit: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +func TestSplitCommit_DuplicateGpgsig(t *testing.T) { + raw := []byte(fmt.Sprintf(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s%s +foo +`, gpgsigBlock(fakeSig), gpgsigBlock(fakeSig))) + + _, _, err := SplitCommit(bytes.NewReader(raw)) + if !errors.Is(err, ErrMalformedObject) { + t.Fatalf("want ErrMalformedObject, got %v", err) + } +} + +// TestSplitCommit_MergeCommit confirms we don't over-reject: commits with +// multiple parent headers (merge commits) are valid and must parse cleanly. +func TestSplitCommit_MergeCommit(t *testing.T) { + raw := []byte(fmt.Sprintf(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +parent 2dc0ab59d7f0a7a62423bd181d9e2ab3adb7b56d +parent 1111111111111111111111111111111111111111 +parent 2222222222222222222222222222222222222222 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s +merge commit +`, gpgsigBlock(fakeSig))) + + body, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + for _, p := range []string{ + "parent 2dc0ab59d7f0a7a62423bd181d9e2ab3adb7b56d\n", + "parent 1111111111111111111111111111111111111111\n", + "parent 2222222222222222222222222222222222222222\n", + } { + if !bytes.Contains(body, []byte(p)) { + t.Errorf("body missing expected parent line %q", p) + } + } + if diff := cmp.Diff([]byte(fakeSig), sig); diff != "" { + t.Errorf("signature mismatch (-want +got):\n%s", diff) + } +} + +// TestSplitCommit_NoSignature documents that an unsigned commit splits without +// error: the entire input becomes payload and the signature is empty. The +// downstream PEM decode + cryptographic check is what surfaces the missing +// signature to the user. +func TestSplitCommit_NoSignature(t *testing.T) { + raw := []byte(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 + +unsigned commit +`) + body, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if len(sig) != 0 { + t.Errorf("expected empty sig, got %q", sig) + } + if diff := cmp.Diff(raw, body); diff != "" { + t.Errorf("body should equal input (-want +got):\n%s", diff) + } +} + +// TestSplitCommit_NoHeaderTerminator documents that a malformed input with no +// blank line terminating the headers is not rejected — the bytes flow through +// to the verifier, which will reject them as not matching anything that was +// signed. +func TestSplitCommit_NoHeaderTerminator(t *testing.T) { + raw := []byte("tree b333504b8cf3d9c314fed2cc242c5c38e89534a5\n") + body, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if len(sig) != 0 { + t.Errorf("expected empty sig, got %q", sig) + } + if diff := cmp.Diff(raw, body); diff != "" { + t.Errorf("body should equal input (-want +got):\n%s", diff) + } +} + +// TestSplitCommit_LenientContinuation confirms gpgsig continuation lines with +// extra leading whitespace (or tabs) are accepted, with all leading whitespace +// stripped. git-core requires exactly one space, but the recovered signature +// is cryptographically verified downstream so leniency here is safe. +func TestSplitCommit_LenientContinuation(t *testing.T) { + raw := []byte("tree b333504b8cf3d9c314fed2cc242c5c38e89534a5\n" + + "author Alice 1700000000 +0000\n" + + "committer Alice 1700000000 +0000\n" + + "gpgsig -----BEGIN SIGNED MESSAGE-----\n" + + " two-space-continuation\n" + + "\textra-tab\n" + + " -----END SIGNED MESSAGE-----\n" + + "\n" + + "foo\n") + + _, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + want := "-----BEGIN SIGNED MESSAGE-----\ntwo-space-continuation\nextra-tab\n-----END SIGNED MESSAGE-----\n" + if diff := cmp.Diff([]byte(want), sig); diff != "" { + t.Errorf("signature mismatch (-want +got):\n%s", diff) + } +} + +// TestJoinCommit_RoundTrip uses the real signed HEAD commit: split it, join +// it back, and confirm the bytes are identical. +func TestJoinCommit_RoundTrip(t *testing.T) { + raw := loadObject(t, "commit.txt") + body, sig, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + rejoined, err := JoinCommit(body, sig) + if err != nil { + t.Fatalf("JoinCommit: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +func TestJoinCommit_NoHeaderTerminator(t *testing.T) { + _, err := JoinCommit([]byte("not-a-commit"), []byte("sig")) + if !errors.Is(err, ErrMalformedObject) { + t.Fatalf("want ErrMalformedObject, got %v", err) + } +} + +// TestSplitTag_WellFormed runs against a real signed tag (testdata/tag.txt = +// `git cat-file -p v0.1.0`) and confirms split + join round-trips +// byte-for-byte. +func TestSplitTag_WellFormed(t *testing.T) { + raw := loadObject(t, "tag.txt") + + body, sig, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + if !bytes.HasPrefix(body, []byte("object ")) { + t.Errorf("body should start with 'object ', got %q", firstLine(body)) + } + if !bytes.HasPrefix(sig, []byte("-----BEGIN ")) { + t.Errorf("sig should start with PEM marker, got %q", firstLine(sig)) + } + + rejoined := JoinTag(body, sig) + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +// TestSplitTag_MultipleSignatureBlocks confirms that when a tag body contains +// more than one "-----BEGIN " block — e.g. an attacker concatenates a +// fake/canonical signature into the message and appends their own signature +// at the end — only the *last* block is treated as the signature, matching +// git-core's tag verification path. Anything before the last block (including +// any earlier PEM-looking content) is part of the signed payload, so a +// signature over a different earlier block doesn't get treated as "the" +// signature. +func TestSplitTag_MultipleSignatureBlocks(t *testing.T) { + raw := []byte(`object 040b9af339e69d18848b7bbe05cb27ee42bb0161 +type commit +tag multi-pem +tagger Alice 1700000000 +0000 + +embedded earlier block in the message body: +-----BEGIN SIGNED MESSAGE----- +ZmFrZQ== +-----END SIGNED MESSAGE----- +-----BEGIN SIGNED MESSAGE----- +cmVhbA== +-----END SIGNED MESSAGE----- +`) + + body, sig, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + + wantSig := []byte(`-----BEGIN SIGNED MESSAGE----- +cmVhbA== +-----END SIGNED MESSAGE----- +`) + if diff := cmp.Diff(wantSig, sig); diff != "" { + t.Errorf("sig mismatch (-want +got):\n%s", diff) + } + + // The earlier PEM block must remain in the body — it's part of what was + // signed, not the signature itself. + if !bytes.Contains(body, []byte("ZmFrZQ==")) { + t.Errorf("body should contain the earlier embedded PEM block") + } + if bytes.Contains(body, []byte("cmVhbA==")) { + t.Errorf("body should not contain the trailing signature payload") + } + + // Round-trip: body + last block reconstructs the original bytes. + if diff := cmp.Diff(raw, JoinTag(body, sig)); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +// TestSplitTag_NoSignature documents that an unsigned tag splits without +// error: body holds the input and sig is empty. +func TestSplitTag_NoSignature(t *testing.T) { + raw := []byte(`object 040b9af339e69d18848b7bbe05cb27ee42bb0161 +type commit +tag unsigned +tagger Alice 1700000000 +0000 + +unsigned tag +`) + body, sig, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + if len(sig) != 0 { + t.Errorf("expected empty sig, got %q", sig) + } + if diff := cmp.Diff(raw, body); diff != "" { + t.Errorf("body should equal input (-want +got):\n%s", diff) + } +} + +// TestSplitTag_NoHeaderTerminator documents that a tag with no blank line is +// not rejected — the verifier downstream catches the mismatch. +func TestSplitTag_NoHeaderTerminator(t *testing.T) { + raw := []byte("object 040b9af339e69d18848b7bbe05cb27ee42bb0161\n") + body, sig, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + if len(sig) != 0 { + t.Errorf("expected empty sig, got %q", sig) + } + if diff := cmp.Diff(raw, body); diff != "" { + t.Errorf("body should equal input (-want +got):\n%s", diff) + } +} + +func TestJoinTag_RoundTrip(t *testing.T) { + raw := loadObject(t, "tag.txt") + body, sig, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + rejoined := JoinTag(body, sig) + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +// firstLine returns the first line of b for use in error messages. +func firstLine(b []byte) []byte { + first, _, _ := bytes.Cut(b, []byte{'\n'}) + return first +} diff --git a/pkg/git/testdata/commit.txt b/pkg/git/testdata/commit.txt new file mode 100644 index 00000000..568d3297 --- /dev/null +++ b/pkg/git/testdata/commit.txt @@ -0,0 +1,45 @@ +tree 3cced1b431769cf16b530b25162e23bb7545d79c +parent 4638d83b00e1119b9aab39ff747802a8ff63f530 +author Billy Lynch 1777409421 -0400 +committer Billy Lynch 1777414757 -0400 +gpgsig -----BEGIN SIGNED MESSAGE----- + MIIEMQYJKoZIhvcNAQcCoIIEIjCCBB4CAQExDTALBglghkgBZQMEAgEwCwYJKoZI + hvcNAQcBoIIC0TCCAs0wggJToAMCAQICFFXZxLeN2TVVA+zJekvjaHqjkE8sMAoG + CCqGSM49BAMDMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2ln + c3RvcmUtaW50ZXJtZWRpYXRlMB4XDTI2MDQyODIyMTkxOVoXDTI2MDQyODIyMjkx + OVowADBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABLSqaKRhP2YdGaAny/Q06etl + lnRenHZ3V/dd/Qrs+dFBql61ykcJxMkBIGVdlwNFBsq/bVdnT4SmLh4phchc5MOj + ggFyMIIBbjAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwHQYD + VR0OBBYEFH8O61w2Zecmc+V/maJiTyqSSK0wMB8GA1UdIwQYMBaAFN/T6c9WJBGW + +ajY6ShVosYuGGQ/MCIGA1UdEQEB/wQYMBaBFGJpbGx5QGNoYWluZ3VhcmQuZGV2 + MCkGCisGAQQBg78wAQEEG2h0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbTArBgor + BgEEAYO/MAEIBB0MG2h0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbTCBigYKKwYB + BAHWeQIEAgR8BHoAeAB2AN09MGrGxxEyYxkeHJlnNwKiSl643jyt/4eKcoAvKe6O + AAABndYs5ecAAAQDAEcwRQIgVr9D9jTvN6V/OHTju7uZ3ozQYzh42nvp1snRT6k9 + hRACIQD3cTr0I32kPAadwtikj8uYZpkqzOPvEAd4/HEy7HLe6zAKBggqhkjOPQQD + AwNoADBlAjAE+izbGwhBz67CE0Z+li7g/CMLfj+vvQHov+r759DHMxFAAZ0Fyz8d + VUhx3uGDwYYCMQDKRy/yLx4F5+TrvEXISSCXOEGi36eR7zwGeyX8iEh3HhuipnOt + 934wxIYdjzc+qxExggEmMIIBIgIBATBPMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRl + djEeMBwGA1UEAxMVc2lnc3RvcmUtaW50ZXJtZWRpYXRlAhRV2cS3jdk1VQPsyXpL + 42h6o5BPLDALBglghkgBZQMEAgGgaTAYBgkqhkiG9w0BCQMxCwYJKoZIhvcNAQcB + MBwGCSqGSIb3DQEJBTEPFw0yNjA0MjgyMjE5MTlaMC8GCSqGSIb3DQEJBDEiBCCF + Ehl2z0/PP3PtI7l3rt+50IjOaduv6YSaac3A1bNbmDAKBggqhkjOPQQDAgRIMEYC + IQDvpk8hV5WbBr/MbdDlSV58HrlV28ByANAboL82/KkK+AIhAJZ6CCS13PQS500u + AndjllmzU6kBCqWygwascQ73CZ9f + -----END SIGNED MESSAGE----- + +Verify against raw git object bytes to prevent parser trust-confusion + +go-git's loose object parser uses last-wins semantics for duplicate +singleton headers (tree, author, committer, etc.), while git-core uses +first-wins. An attacker can craft a commit or tag whose raw bytes hash +to one set of contents under git-core but re-encode through go-git to a +different signed payload, letting a legitimate signature verify against +attacker-controlled bytes. + +Replace the go-git decode + EncodeWithoutSignature path with SplitCommit +and SplitTag, which operate directly on the object-database bytes (the +same bytes git-core feeds its verifier) and reject objects with +structural ambiguities — duplicate singleton headers, duplicate gpgsig, +malformed gpgsig continuations. ObjectHash now reassembles via +JoinCommit/JoinTag so the recorded hash matches git-core. diff --git a/pkg/git/testdata/tag.txt b/pkg/git/testdata/tag.txt new file mode 100644 index 00000000..e13f8d9d --- /dev/null +++ b/pkg/git/testdata/tag.txt @@ -0,0 +1,30 @@ +object 2d9cff2bad7132c586e128bcc23322dbb5297e8e +type commit +tag v0.1.0 +tagger Billy Lynch 1654190833 -0400 + +v0.1.0 +-----BEGIN SIGNED MESSAGE----- +MIIEAQYJKoZIhvcNAQcCoIID8jCCA+4CAQExDTALBglghkgBZQMEAgEwCwYJKoZI +hvcNAQcBoIICozCCAp8wggIloAMCAQICFFjVwi84I8kTNqgZRmYKleVVm8NBMAoG +CCqGSM49BAMDMDcxFTATBgNVBAoTDHNpZ3N0b3JlLmRldjEeMBwGA1UEAxMVc2ln +c3RvcmUtaW50ZXJtZWRpYXRlMB4XDTIyMDYwMjE3MjcxN1oXDTIyMDYwMjE3Mzcx +N1owADBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABIK6JRkNRb0z1H3EmQa9zcgN +KdPblSoo1jw5+khs9nV3FPuxvnyJHlAvM4evIkRSRpw7I4LLYIOPlR7TBpQx7vWj +ggFEMIIBQDAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwHQYD +VR0OBBYEFKmLjG22l4wfrfLlDLg2G2sZW5C0MB8GA1UdIwQYMBaAFN/T6c9WJBGW ++ajY6ShVosYuGGQ/MCIGA1UdEQEB/wQYMBaBFGJpbGx5QGNoYWluZ3VhcmQuZGV2 +MCgGCisGAQQBg78wAQEEGmdpdGh1Yi1zaWdzdG9yZS1wcm9kdWN0aW9uMIGKBgor +BgEEAdZ5AgQCBHwEegB4AHYACGCS8ChS/2hF0dFrJ4ScRWcYrBY9wzjSbea8IgY2 +b3IAAAGBJXUQkwAABAMARzBFAiEApoKNdV/X2WpMufae6aoxViXF9h/FQfUPCPUo +15qiMlwCIE+98oDMJZjIJMEp9MtyYbIIhctm/FF7WJ6oV4AseuzDMAoGCCqGSM49 +BAMDA2gAMGUCMQDwONklh8R09WQJy0iJA16Qin6lXjixqNvkKW+1X8i5K2Aft2wZ ++eYZKyF1oMCFy0gCMH6/G9E77FgXiNWJEE559FF8eSjX4UpDCKNeHuiBmcDYSONw +ctsPJ48/40lmNJgWjzGCASQwggEgAgEBME8wNzEVMBMGA1UEChMMc2lnc3RvcmUu +ZGV2MR4wHAYDVQQDExVzaWdzdG9yZS1pbnRlcm1lZGlhdGUCFFjVwi84I8kTNqgZ +RmYKleVVm8NBMAsGCWCGSAFlAwQCAaBpMBgGCSqGSIb3DQEJAzELBgkqhkiG9w0B +BwEwHAYJKoZIhvcNAQkFMQ8XDTIyMDYwMjE3MjcxOFowLwYJKoZIhvcNAQkEMSIE +IJS1yaKNQbN+VIRZ8gHWI9+cLWx39QlpBM4T3ILbSsNBMAoGCCqGSM49BAMCBEYw +RAIgOFDlbTSxHflbq/IS73TWvGgUr7ieO1dlljIALkNbJ5UCIEG9TS7F+N1UdBin +jFqU99hj1obCogXt2+rwd/+8IyYt +-----END SIGNED MESSAGE----- diff --git a/pkg/git/verify.go b/pkg/git/verify.go index ce627da9..751b45e4 100644 --- a/pkg/git/verify.go +++ b/pkg/git/verify.go @@ -23,7 +23,6 @@ import ( "fmt" "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/object" "github.com/sigstore/gitsign/pkg/rekor" "github.com/sigstore/rekor/pkg/generated/models" ) @@ -118,76 +117,34 @@ func VerifySignature(data, sig []byte, detached bool, rootCerts, intermediates * return v.Verify(context.Background(), data, sig, detached) } -type encoder interface { - Encode(o plumbing.EncodedObject) error -} - -// ObjectHash is a string representation of an encoded Git object +// ObjectHash is a string representation of an encoded Git object. data is the +// signed payload (the bytes fed to the verifier); sig is the PEM-encoded +// signature that was embedded in the object. The returned hash matches what +// git-core computes for the reassembled raw object. func ObjectHash(data, sig []byte) (string, error) { - // Precompute commit hash to store in tlog - obj := &plumbing.MemoryObject{} - if _, err := obj.Write(data); err != nil { - return "", err - } - var ( - encoder encoder + raw []byte + objType plumbing.ObjectType err error ) - // We're making big assumptions here about the ordering of fields - // in Git objects. Unfortunately go-git does loose parsing of objects, - // so it will happily decode objects that don't match the unmarshal type. - // We should see if there's a better way to detect object types. switch { case bytes.HasPrefix(data, []byte("tree ")): - encoder, err = commit(obj, sig) + raw, err = JoinCommit(data, sig) + if err != nil { + return "", err + } + objType = plumbing.CommitObject case bytes.HasPrefix(data, []byte("object ")): - encoder, err = tag(obj, sig) + raw = JoinTag(data, sig) + objType = plumbing.TagObject default: return "", errors.New("could not determine Git object type") } - if err != nil { - return "", err - } - - // go-git will compute a hash on decode and preserve even if we alter the - // object data. To work around this, re-encode the object into a new object - // to force a new hash to be computed. - out := &plumbing.MemoryObject{} - err = encoder.Encode(out) - return out.Hash().String(), err -} - -func commit(obj *plumbing.MemoryObject, sig []byte) (*object.Commit, error) { - obj.SetType(plumbing.CommitObject) - - base := object.Commit{} - if err := base.Decode(obj); err != nil { - return nil, err - } - return &object.Commit{ - Author: base.Author, - Committer: base.Committer, - PGPSignature: string(sig), - Message: base.Message, - TreeHash: base.TreeHash, - ParentHashes: base.ParentHashes, - }, nil -} -func tag(obj *plumbing.MemoryObject, sig []byte) (*object.Tag, error) { - obj.SetType(plumbing.TagObject) - - base := object.Tag{} - if err := base.Decode(obj); err != nil { - return nil, err + obj := &plumbing.MemoryObject{} + obj.SetType(objType) + if _, err := obj.Write(raw); err != nil { + return "", err } - return &object.Tag{ - Tagger: base.Tagger, - Name: base.Name, - TargetType: base.TargetType, - Target: base.Target, - Message: base.Message, - PGPSignature: string(sig), - }, nil + return obj.Hash().String(), nil } From 4ea91f483b41b3bacdcc0ba20cc7d75722702abc Mon Sep 17 00:00:00 2001 From: Billy Lynch Date: Mon, 4 May 2026 13:31:03 -0400 Subject: [PATCH 2/2] Add support for sha256 headers. See https://git-scm.com/docs/hash-function-transition#_signed_commits Signed-off-by: Billy Lynch --- internal/commands/verify-tag/verify_tag.go | 13 +- internal/commands/verify/verify.go | 17 +- internal/git/git.go | 6 +- internal/gitsign/invalid_object_test.go | 8 +- pkg/git/git_test.go | 28 +- pkg/git/rawobj.go | 359 ++++++++++++----- pkg/git/rawobj_test.go | 439 +++++++++++++++++---- pkg/git/verify.go | 46 ++- 8 files changed, 702 insertions(+), 214 deletions(-) diff --git a/internal/commands/verify-tag/verify_tag.go b/internal/commands/verify-tag/verify_tag.go index e1a041e5..33523137 100644 --- a/internal/commands/verify-tag/verify_tag.go +++ b/internal/commands/verify-tag/verify_tag.go @@ -74,11 +74,20 @@ func (o *options) Run(_ io.Writer, args []string) error { } defer r.Close() // nolint:errcheck - data, sig, err := git.SplitTag(r) + t, err := git.SplitTag(r) if err != nil { return fmt.Errorf("error extracting tag signature: %w", err) } + // Per the SHA-256 transition spec, the in-body PEM block is the + // signature in the tag's current hash algorithm; gpgsig / + // gpgsig-sha256 headers carry signatures over the alternate-algorithm + // form. We verify the local-form signature. + sig := t.InBody + if sig == nil { + return fmt.Errorf("tag has no in-body signature") + } + p, _ := pem.Decode(sig) if p == nil { return fmt.Errorf("%w: not a PEM block", git.ErrUnsupportedSignatureType) @@ -92,7 +101,7 @@ func (o *options) Run(_ io.Writer, args []string) error { if err != nil { return err } - summary, err := v.Verify(ctx, data, sig, true) + summary, err := v.Verify(ctx, t.Payload, sig, true) if err != nil { return err } diff --git a/internal/commands/verify/verify.go b/internal/commands/verify/verify.go index 40fcc43a..6346e275 100644 --- a/internal/commands/verify/verify.go +++ b/internal/commands/verify/verify.go @@ -71,11 +71,24 @@ func (o *options) Run(_ io.Writer, args []string) error { } defer r.Close() // nolint:errcheck - data, sig, err := git.SplitCommit(r) + c, err := git.SplitCommit(r) if err != nil { return fmt.Errorf("error extracting commit signature: %w", err) } + // Per the SHA-256 transition spec a commit can carry gpgsig (SHA-1 + // form), gpgsig-sha256 (SHA-256 form), or both. Prefer gpgsig — every + // repo go-git can read today is SHA-1 form, so its gpgsig matches the + // stripped Payload. gpgsig-sha256 is the fallback for SHA-256-only + // signed commits. + sig := c.Gpgsig + if sig == nil { + sig = c.GpgsigSha256 + } + if sig == nil { + return fmt.Errorf("commit has no gpgsig or gpgsig-sha256 signature") + } + p, _ := pem.Decode(sig) if p == nil { return fmt.Errorf("%w: not a PEM block", git.ErrUnsupportedSignatureType) @@ -88,7 +101,7 @@ func (o *options) Run(_ io.Writer, args []string) error { if err != nil { return err } - summary, err := v.Verify(ctx, data, sig, true) + summary, err := v.Verify(ctx, c.Payload, sig, true) if err != nil { return err } diff --git a/internal/git/git.go b/internal/git/git.go index 2aa9294d..311321f4 100644 --- a/internal/git/git.go +++ b/internal/git/git.go @@ -52,7 +52,11 @@ func LegacySHASign(ctx context.Context, rekor rekor.Writer, ident *fulcio.Identi // using the same key, this is probably okay? e.g. even if you could cause a SHA1 collision, // you would still need the underlying commit to be valid and using the same key which seems hard. - commit, err := git.ObjectHash(data, resp.Signature) + raw, err := git.JoinCommit(&git.CommitSig{Payload: data, Gpgsig: resp.Signature}) + if err != nil { + return nil, fmt.Errorf("error reassembling commit: %w", err) + } + commit, err := git.ObjectHash(raw) if err != nil { return nil, fmt.Errorf("error generating commit hash: %w", err) } diff --git a/internal/gitsign/invalid_object_test.go b/internal/gitsign/invalid_object_test.go index f0b31c30..82dc0f08 100644 --- a/internal/gitsign/invalid_object_test.go +++ b/internal/gitsign/invalid_object_test.go @@ -112,11 +112,11 @@ func TestDuplicateTreeTrustConfusion(t *testing.T) { // tree headers intact) instead of the go-git-normalized form. The // signature was made over the canonical (single-tree) bytes, so the // cryptographic check rejects this. - body, sig, err := git.SplitCommit(bytes.NewReader(malformedRaw)) + c, err := git.SplitCommit(bytes.NewReader(malformedRaw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - if _, err := gv.Verify(context.Background(), body, sig, true); err == nil { + if _, err := gv.Verify(context.Background(), c.Payload, c.Gpgsig, true); err == nil { t.Fatalf("expected signature verification to fail against malformed raw bytes, got nil error") } }) @@ -133,11 +133,11 @@ func TestDuplicateTreeTrustConfusion(t *testing.T) { message, )) - data, sig, err := git.SplitCommit(bytes.NewReader(wellFormed)) + c, err := git.SplitCommit(bytes.NewReader(wellFormed)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - if _, err := gv.Verify(context.Background(), data, sig, true); err != nil { + if _, err := gv.Verify(context.Background(), c.Payload, c.Gpgsig, true); err != nil { t.Errorf("signature verify over well-formed bytes: %v", err) } }) diff --git a/pkg/git/git_test.go b/pkg/git/git_test.go index 81f5f633..1564925c 100644 --- a/pkg/git/git_test.go +++ b/pkg/git/git_test.go @@ -15,7 +15,6 @@ package git import ( - "bytes" "os" "testing" ) @@ -43,31 +42,16 @@ const ( func TestObjectHash(t *testing.T) { for _, tc := range []struct { - name string - file string - split func([]byte) ([]byte, []byte, error) - sha string + name string + file string + sha string }{ - { - name: "commit", - file: "commit.txt", - split: func(raw []byte) ([]byte, []byte, error) { return SplitCommit(bytes.NewReader(raw)) }, - sha: commitSHA, - }, - { - name: "tag", - file: "tag.txt", - split: func(raw []byte) ([]byte, []byte, error) { return SplitTag(bytes.NewReader(raw)) }, - sha: tagSHA, - }, + {name: "commit", file: "commit.txt", sha: commitSHA}, + {name: "tag", file: "tag.txt", sha: tagSHA}, } { t.Run(tc.name, func(t *testing.T) { raw := loadObject(t, tc.file) - body, sig, err := tc.split(raw) - if err != nil { - t.Fatalf("split: %v", err) - } - got, err := ObjectHash(body, sig) + got, err := ObjectHash(raw) if err != nil { t.Fatal(err) } diff --git a/pkg/git/rawobj.go b/pkg/git/rawobj.go index 084f0f2a..53cb7925 100644 --- a/pkg/git/rawobj.go +++ b/pkg/git/rawobj.go @@ -26,8 +26,10 @@ import ( var ( // ErrMalformedObject is returned for structural ambiguities the - // downstream signature check can't catch on its own — currently just - // multiple gpgsig headers, where it's unclear which signature to extract. + // downstream signature check can't catch on its own — currently a + // duplicate gpgsig or gpgsig-sha256 header (where it's unclear which + // signature to extract) or, on the join side, a payload with no + // header/body separator. ErrMalformedObject = errors.New("malformed git object") // ErrUnsupportedSignatureType is returned when the embedded signature is // not a format gitsign can verify. gitsign only accepts PEM blocks of type @@ -35,13 +37,67 @@ var ( ErrUnsupportedSignatureType = errors.New("unsupported signature type") ) +// gpgsigPrefix is the SHA-1 signature header. The trailing space distinguishes +// it from gpgsig-sha256 and any future "gpgsig-*" extension headers, which +// are independently valid header fields. const gpgsigPrefix = "gpgsig " -// SplitCommit splits the raw bytes of a commit object (in object-database form, -// without the "commit \0" prefix) into the payload that was signed and -// the PEM-encoded signature. It operates purely on the raw bytes, mirroring -// what git-core feeds to its signature verifier, and does not go through -// go-git's object parser. +// gpgsigSha256Prefix is the SHA-256 transition compat header introduced by +// the hash-function-transition spec. When compatObjectFormat is set, git +// emits this alongside gpgsig — see +// https://git-scm.com/docs/hash-function-transition#_signed_commits. +const gpgsigSha256Prefix = "gpgsig-sha256 " + +// CommitSig holds signature material extracted from a commit's raw bytes. +// +// Per the SHA-256 transition spec, the signed payload for either gpgsig or +// gpgsig-sha256 is the commit content with BOTH header fields removed (in +// the commit's own hash algorithm). The two signatures sign different bytes +// — the SHA-1 form vs the SHA-256 form of the same logical commit — but +// they share the same removal rule. Payload here is whatever form the +// caller's input bytes are in, with both header fields stripped, so it +// matches whichever of Gpgsig / GpgsigSha256 corresponds to that form. +type CommitSig struct { + // Payload is the commit content with gpgsig and gpgsig-sha256 fields + // (and their continuation lines) removed. + Payload []byte + // Gpgsig is the PEM signature from the gpgsig header. It signs the + // SHA-1 form of the commit. Nil if the header is absent. + Gpgsig []byte + // GpgsigSha256 is the PEM signature from the gpgsig-sha256 header. It + // signs the SHA-256 form of the commit. Nil if the header is absent. + GpgsigSha256 []byte +} + +// TagSig holds signature material extracted from a tag's raw bytes. +// +// Per the SHA-256 transition spec, the in-body PEM block is the signature +// over the tag in its current hash algorithm; the gpgsig and gpgsig-sha256 +// headers are signatures over the alternate-algorithm form. All three sign +// the same shape: tag content with both header fields and the in-body PEM +// block removed. +type TagSig struct { + // Payload is the tag content with both header signature fields and the + // in-body PEM block removed. + Payload []byte + // InBody is the PEM signature appended after the tag message body. It + // signs the tag in its own (current) hash algorithm. Nil if absent. + InBody []byte + // Gpgsig is the PEM signature from the gpgsig header — the + // alternate-algorithm signature when the tag's stored form is SHA-256, + // or unused for a SHA-1 tag. Nil if absent. + Gpgsig []byte + // GpgsigSha256 is the PEM signature from the gpgsig-sha256 header — + // the alternate-algorithm signature when the tag's stored form is + // SHA-1. Nil if absent. + GpgsigSha256 []byte +} + +// SplitCommit parses the raw bytes of a commit object (object-database form, +// without the "commit \0" prefix) into payload + signatures. +// +// It operates purely on the raw bytes, mirroring what git-core feeds to its +// signature verifier, and does not go through go-git's object parser. // // The trust-confusion class of attack (GHSA-7rmh-48mx-2vwc) relied on gitsign // re-encoding through go-git before verification — which normalized away @@ -51,16 +107,21 @@ const gpgsigPrefix = "gpgsig " // what's stored makes the signature fail to verify cryptographically. // Consequently, SplitCommit doesn't reject merely "weird but git-valid" // objects (e.g. duplicate tree headers); the signature check below is what -// catches them. The one structural thing we do reject is multiple gpgsig -// headers, since that's ambiguous about which signature to extract. -func SplitCommit(r io.Reader) (payload, sig []byte, err error) { +// catches them. The structural things we *do* reject are duplicate gpgsig +// or gpgsig-sha256 headers, because either is ambiguous about which +// signature to extract. +func SplitCommit(r io.Reader) (*CommitSig, error) { scanner := bufio.NewScanner(r) var ( payloadBuf bytes.Buffer - sigBuf bytes.Buffer - inGpgsig bool - inBody bool + gpgsigBuf bytes.Buffer + sha256Buf bytes.Buffer + // activeSig points at whichever signature buffer is currently + // accepting continuation lines, or nil if we're in regular-header + // territory. + activeSig *bytes.Buffer + inBody bool ) for scanner.Scan() { @@ -76,12 +137,12 @@ func SplitCommit(r io.Reader) (payload, sig []byte, err error) { // Blank line terminates the header section. payloadBuf.WriteByte('\n') inBody = true - inGpgsig = false + activeSig = nil continue } - if inGpgsig { - // git-core requires exactly one leading space on a gpgsig + if activeSig != nil { + // git-core requires exactly one leading space on a signature // continuation (see git/commit.c parse_buffer_signed_by_header). // We accept any leading whitespace and strip it: the signature // is cryptographically verified downstream, so leniency here @@ -89,128 +150,242 @@ func SplitCommit(r io.Reader) (payload, sig []byte, err error) { // rejecting signatures produced by tooling that wraps with // slightly different indentation. if trimmed := bytes.TrimLeftFunc(line, unicode.IsSpace); len(trimmed) < len(line) { - sigBuf.Write(trimmed) - sigBuf.WriteByte('\n') + activeSig.Write(trimmed) + activeSig.WriteByte('\n') continue } - // Non-continuation line -> gpgsig block ended; fall through and - // process this line as a fresh header. - inGpgsig = false + // Non-continuation line -> signature block ended; fall through + // and process this line as a fresh header. + activeSig = nil } - if bytes.HasPrefix(line, []byte(gpgsigPrefix)) { - if sigBuf.Len() > 0 { - return nil, nil, fmt.Errorf("%w: duplicate gpgsig header", ErrMalformedObject) + // Note: check the longer prefix first so "gpgsig-sha256 " doesn't + // get misclassified by the "gpgsig " branch. (In practice the two + // don't overlap because the 7th byte differs, but ordering this way + // is robust against accidental future widenings of gpgsigPrefix.) + switch { + case bytes.HasPrefix(line, []byte(gpgsigSha256Prefix)): + if sha256Buf.Len() > 0 { + return nil, fmt.Errorf("%w: duplicate gpgsig-sha256 header", ErrMalformedObject) } - inGpgsig = true - sigBuf.Write(line[len(gpgsigPrefix):]) - sigBuf.WriteByte('\n') - continue + activeSig = &sha256Buf + activeSig.Write(line[len(gpgsigSha256Prefix):]) + activeSig.WriteByte('\n') + case bytes.HasPrefix(line, []byte(gpgsigPrefix)): + if gpgsigBuf.Len() > 0 { + return nil, fmt.Errorf("%w: duplicate gpgsig header", ErrMalformedObject) + } + activeSig = &gpgsigBuf + activeSig.Write(line[len(gpgsigPrefix):]) + activeSig.WriteByte('\n') + default: + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') } - - payloadBuf.Write(line) - payloadBuf.WriteByte('\n') } if err := scanner.Err(); err != nil { - return nil, nil, err + return nil, err } - return payloadBuf.Bytes(), sigBuf.Bytes(), nil + return &CommitSig{ + Payload: payloadBuf.Bytes(), + Gpgsig: sigOrNil(&gpgsigBuf), + GpgsigSha256: sigOrNil(&sha256Buf), + }, nil } -// JoinCommit is the inverse of SplitCommit. It inserts a gpgsig header -// containing sig into payload, immediately before the blank line separating -// headers from message body. Continuation lines are indented with a single -// space to match git-core's wire format. +// JoinCommit is the inverse of SplitCommit. It re-inserts the gpgsig and +// gpgsig-sha256 headers (those that are non-nil) into c.Payload immediately +// before the blank line separating headers from the message body. +// Continuation lines are indented with a single space, matching git-core's +// wire format. // -// sig is the PEM-encoded signature with lines separated by "\n". A trailing -// newline on sig is ignored. -func JoinCommit(payload, sig []byte) ([]byte, error) { - hdrEnd := bytes.Index(payload, []byte("\n\n")) +// When both headers are present they are emitted in a fixed order: gpgsig +// first, then gpgsig-sha256. This matches git-core's emission order, so +// objects produced by git-core round-trip byte-for-byte through Split/Join. +func JoinCommit(c *CommitSig) ([]byte, error) { + if len(c.Gpgsig) == 0 && len(c.GpgsigSha256) == 0 { + // No signatures to insert; passthrough. + return c.Payload, nil + } + + hdrEnd := bytes.Index(c.Payload, []byte("\n\n")) if hdrEnd < 0 { return nil, fmt.Errorf("%w: payload has no header terminator", ErrMalformedObject) } - // Split signature into lines, drop a single trailing newline if any. - s := sig - if len(s) > 0 && s[len(s)-1] == '\n' { - s = s[:len(s)-1] - } - lines := bytes.Split(s, []byte{'\n'}) var hdr bytes.Buffer - hdr.WriteString(gpgsigPrefix) - hdr.Write(lines[0]) - hdr.WriteByte('\n') - for _, l := range lines[1:] { - hdr.WriteByte(' ') - hdr.Write(l) - hdr.WriteByte('\n') + if len(c.Gpgsig) > 0 { + writeSigHeader(&hdr, gpgsigPrefix, c.Gpgsig) + } + if len(c.GpgsigSha256) > 0 { + writeSigHeader(&hdr, gpgsigSha256Prefix, c.GpgsigSha256) } - out := make([]byte, 0, len(payload)+hdr.Len()) - out = append(out, payload[:hdrEnd+1]...) // include the \n terminating the last header + out := make([]byte, 0, len(c.Payload)+hdr.Len()) + out = append(out, c.Payload[:hdrEnd+1]...) // include the \n terminating the last header out = append(out, hdr.Bytes()...) - out = append(out, payload[hdrEnd+1:]...) // the remaining \n + message body + out = append(out, c.Payload[hdrEnd+1:]...) // the remaining \n + message body return out, nil } -// SplitTag splits the raw bytes of a tag object into the payload that was -// signed and the trailing PEM signature block. Like SplitCommit, it works on -// raw bytes and does not invoke go-git's parser, so any divergence between -// what was signed and what's stored is caught by the cryptographic check -// downstream rather than by structural validation here. The signature is -// taken to start at the last line-anchored "-----BEGIN " marker, matching -// git-core's tag verification path. -func SplitTag(r io.Reader) (payload, sig []byte, err error) { +// SplitTag parses the raw bytes of a tag object into payload + signatures. +// Like SplitCommit, it works on raw bytes and does not invoke go-git's +// parser, so any divergence between what was signed and what's stored is +// caught by the cryptographic check downstream rather than by structural +// validation here. +// +// The tag's in-body PEM block (current-algorithm signature) is taken to +// start at the *last* line-anchored "-----BEGIN " marker, matching +// git-core's tag verification path. Anything before it stays in the +// payload, including any earlier PEM-looking lines an attacker might have +// embedded in the message body. Header-style alternate-algorithm +// signatures (gpgsig, gpgsig-sha256) are stripped from the header section +// the same way SplitCommit does. +func SplitTag(r io.Reader) (*TagSig, error) { scanner := bufio.NewScanner(r) var ( - payloadBuf bytes.Buffer - sigBuf bytes.Buffer - inBody bool + payloadBuf bytes.Buffer + inBodySigBuf bytes.Buffer + gpgsigBuf bytes.Buffer + sha256Buf bytes.Buffer + activeSig *bytes.Buffer + inBody bool ) for scanner.Scan() { line := scanner.Bytes() - if !inBody { - payloadBuf.Write(line) - payloadBuf.WriteByte('\n') - if len(line) == 0 { - inBody = true + if inBody { + switch { + case bytes.HasPrefix(line, []byte("-----BEGIN ")): + // New PEM block opens — flush any earlier candidate to + // payload (it wasn't the trailing block) and start fresh. + payloadBuf.Write(inBodySigBuf.Bytes()) + inBodySigBuf.Reset() + inBodySigBuf.Write(line) + inBodySigBuf.WriteByte('\n') + case inBodySigBuf.Len() > 0: + inBodySigBuf.Write(line) + inBodySigBuf.WriteByte('\n') + default: + payloadBuf.Write(line) + payloadBuf.WriteByte('\n') } continue } - // In body. Track only the last "-----BEGIN " block as the signature - // — anything before it (including any earlier PEM-looking lines in - // the message body) belongs in payload. + if len(line) == 0 { + payloadBuf.WriteByte('\n') + inBody = true + activeSig = nil + continue + } + + if activeSig != nil { + if trimmed := bytes.TrimLeftFunc(line, unicode.IsSpace); len(trimmed) < len(line) { + activeSig.Write(trimmed) + activeSig.WriteByte('\n') + continue + } + activeSig = nil + } + switch { - case bytes.HasPrefix(line, []byte("-----BEGIN ")): - payloadBuf.Write(sigBuf.Bytes()) - sigBuf.Reset() - sigBuf.Write(line) - sigBuf.WriteByte('\n') - case sigBuf.Len() > 0: - sigBuf.Write(line) - sigBuf.WriteByte('\n') + case bytes.HasPrefix(line, []byte(gpgsigSha256Prefix)): + if sha256Buf.Len() > 0 { + return nil, fmt.Errorf("%w: duplicate gpgsig-sha256 header", ErrMalformedObject) + } + activeSig = &sha256Buf + activeSig.Write(line[len(gpgsigSha256Prefix):]) + activeSig.WriteByte('\n') + case bytes.HasPrefix(line, []byte(gpgsigPrefix)): + if gpgsigBuf.Len() > 0 { + return nil, fmt.Errorf("%w: duplicate gpgsig header", ErrMalformedObject) + } + activeSig = &gpgsigBuf + activeSig.Write(line[len(gpgsigPrefix):]) + activeSig.WriteByte('\n') default: payloadBuf.Write(line) payloadBuf.WriteByte('\n') } } if err := scanner.Err(); err != nil { - return nil, nil, err + return nil, err } - return payloadBuf.Bytes(), sigBuf.Bytes(), nil + return &TagSig{ + Payload: payloadBuf.Bytes(), + InBody: sigOrNil(&inBodySigBuf), + Gpgsig: sigOrNil(&gpgsigBuf), + GpgsigSha256: sigOrNil(&sha256Buf), + }, nil } -// JoinTag is the inverse of SplitTag. The signature is appended verbatim to -// the payload. -func JoinTag(payload, sig []byte) []byte { - out := make([]byte, 0, len(payload)+len(sig)) +// JoinTag is the inverse of SplitTag. It re-inserts gpgsig / gpgsig-sha256 +// headers into the header section (in fixed order: gpgsig, then +// gpgsig-sha256) and appends the in-body PEM block after the message body. +func JoinTag(t *TagSig) ([]byte, error) { + payload := t.Payload + + if len(t.Gpgsig) > 0 || len(t.GpgsigSha256) > 0 { + hdrEnd := bytes.Index(payload, []byte("\n\n")) + if hdrEnd < 0 { + return nil, fmt.Errorf("%w: payload has no header terminator", ErrMalformedObject) + } + + var hdr bytes.Buffer + if len(t.Gpgsig) > 0 { + writeSigHeader(&hdr, gpgsigPrefix, t.Gpgsig) + } + if len(t.GpgsigSha256) > 0 { + writeSigHeader(&hdr, gpgsigSha256Prefix, t.GpgsigSha256) + } + + out := make([]byte, 0, len(payload)+hdr.Len()+len(t.InBody)) + out = append(out, payload[:hdrEnd+1]...) + out = append(out, hdr.Bytes()...) + out = append(out, payload[hdrEnd+1:]...) + out = append(out, t.InBody...) + return out, nil + } + + out := make([]byte, 0, len(payload)+len(t.InBody)) out = append(out, payload...) - out = append(out, sig...) - return out + out = append(out, t.InBody...) + return out, nil +} + +// writeSigHeader emits a single header field of the form +// +// prefix line1\n line2\n line3\n... +// +// matching git-core's wire format. sig is a PEM-encoded signature with +// lines separated by "\n"; a trailing newline is ignored. +func writeSigHeader(w *bytes.Buffer, prefix string, sig []byte) { + s := sig + if len(s) > 0 && s[len(s)-1] == '\n' { + s = s[:len(s)-1] + } + lines := bytes.Split(s, []byte{'\n'}) + + w.WriteString(prefix) + w.Write(lines[0]) + w.WriteByte('\n') + for _, l := range lines[1:] { + w.WriteByte(' ') + w.Write(l) + w.WriteByte('\n') + } +} + +// sigOrNil returns nil for an empty buffer (so absent signatures surface as +// nil rather than zero-length slices). +func sigOrNil(b *bytes.Buffer) []byte { + if b.Len() == 0 { + return nil + } + return b.Bytes() } diff --git a/pkg/git/rawobj_test.go b/pkg/git/rawobj_test.go index af59b36b..631c5baa 100644 --- a/pkg/git/rawobj_test.go +++ b/pkg/git/rawobj_test.go @@ -32,13 +32,19 @@ ZmFrZXBheWxvYWQ= -----END SIGNED MESSAGE----- ` -// gpgsigBlock formats a PEM signature as a git gpgsig header block: first -// line preceded by "gpgsig ", subsequent lines by a single space, ending -// with a trailing newline. -func gpgsigBlock(sig string) string { +const fakeSig2 = `-----BEGIN SIGNED MESSAGE----- +b3RoZXJwYXlsb2Fk +-----END SIGNED MESSAGE----- +` + +// sigHeader formats a PEM signature as a wire-format header block: the first +// line preceded by `prefix` (e.g. "gpgsig "), subsequent lines indented by a +// single space. The result ends with a trailing newline so it can be +// concatenated directly into a header section. +func sigHeader(prefix, sig string) string { sig = strings.TrimSuffix(sig, "\n") lines := strings.Split(sig, "\n") - return "gpgsig " + strings.Join(lines, "\n ") + "\n" + return prefix + strings.Join(lines, "\n ") + "\n" } // TestSplitCommit_WellFormed runs against a real signed commit @@ -47,21 +53,24 @@ func gpgsigBlock(sig string) string { func TestSplitCommit_WellFormed(t *testing.T) { raw := loadObject(t, "commit.txt") - body, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - if !bytes.HasPrefix(body, []byte("tree ")) { - t.Errorf("body should start with 'tree ', got %q", firstLine(body)) + if !bytes.HasPrefix(c.Payload, []byte("tree ")) { + t.Errorf("payload should start with 'tree ', got %q", firstLine(c.Payload)) + } + if bytes.Contains(c.Payload, []byte("\ngpgsig ")) { + t.Errorf("payload should not contain gpgsig header") } - if bytes.Contains(body, []byte("\ngpgsig ")) { - t.Errorf("body should not contain gpgsig header") + if c.GpgsigSha256 != nil { + t.Errorf("expected no gpgsig-sha256 on a SHA-1 only commit, got %q", c.GpgsigSha256) } - if !bytes.HasPrefix(sig, []byte("-----BEGIN ")) { - t.Errorf("sig should start with PEM marker, got %q", firstLine(sig)) + if !bytes.HasPrefix(c.Gpgsig, []byte("-----BEGIN ")) { + t.Errorf("Gpgsig should start with PEM marker, got %q", firstLine(c.Gpgsig)) } - rejoined, err := JoinCommit(body, sig) + rejoined, err := JoinCommit(c) if err != nil { t.Fatalf("JoinCommit: %v", err) } @@ -71,23 +80,234 @@ func TestSplitCommit_WellFormed(t *testing.T) { } func TestSplitCommit_DuplicateGpgsig(t *testing.T) { - raw := []byte(fmt.Sprintf(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 author Alice 1700000000 +0000 committer Alice 1700000000 +0000 %s%s foo -`, gpgsigBlock(fakeSig), gpgsigBlock(fakeSig))) +`, sigHeader(gpgsigPrefix, fakeSig), sigHeader(gpgsigPrefix, fakeSig)) - _, _, err := SplitCommit(bytes.NewReader(raw)) + _, err := SplitCommit(bytes.NewReader(raw)) if !errors.Is(err, ErrMalformedObject) { t.Fatalf("want ErrMalformedObject, got %v", err) } } +func TestSplitCommit_DuplicateGpgsigSha256(t *testing.T) { + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s%s +foo +`, sigHeader(gpgsigSha256Prefix, fakeSig), sigHeader(gpgsigSha256Prefix, fakeSig)) + + _, err := SplitCommit(bytes.NewReader(raw)) + if !errors.Is(err, ErrMalformedObject) { + t.Fatalf("want ErrMalformedObject, got %v", err) + } +} + +// TestSplitCommit_DualSigned exercises the SHA-256 transition compat shape +// — a commit carrying both gpgsig and gpgsig-sha256 +// (https://git-scm.com/docs/hash-function-transition#_signed_commits) — and +// confirms each signature is extracted into its own field with both header +// fields stripped from Payload (the spec says either signature's signed +// payload is the commit content with *both* fields removed). +// +// JoinCommit uses a fixed gpgsig-then-gpgsig-sha256 emission order, so the +// "gpgsig-first" subcase round-trips byte-for-byte while the +// "gpgsig-sha256-first" subcase only checks parse correctness. +func TestSplitCommit_DualSigned(t *testing.T) { + gpgsig := sigHeader(gpgsigPrefix, fakeSig) + sha256Sig := sigHeader(gpgsigSha256Prefix, fakeSig2) + + for _, tc := range []struct { + name string + first string + second string + roundTrip bool + }{ + {"gpgsig-first", gpgsig, sha256Sig, true}, + {"gpgsig-sha256-first", sha256Sig, gpgsig, false}, + } { + t.Run(tc.name, func(t *testing.T) { + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s%s +dual-signed +`, tc.first, tc.second) + + c, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if diff := cmp.Diff([]byte(fakeSig), c.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff([]byte(fakeSig2), c.GpgsigSha256); diff != "" { + t.Errorf("GpgsigSha256 mismatch (-want +got):\n%s", diff) + } + if bytes.Contains(c.Payload, []byte("\ngpgsig ")) { + t.Errorf("payload should not contain gpgsig header") + } + if bytes.Contains(c.Payload, []byte("\ngpgsig-sha256 ")) { + t.Errorf("payload should not contain gpgsig-sha256 header") + } + + if tc.roundTrip { + rejoined, err := JoinCommit(c) + if err != nil { + t.Fatalf("JoinCommit: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } + } + }) + } +} + +// TestSplitCommit_Sha256Only covers mode 3 of the transition spec: a commit +// signed only with gpgsig-sha256 (no gpgsig). gitsign should extract the +// signature and report Gpgsig as nil. +func TestSplitCommit_Sha256Only(t *testing.T) { + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s +sha256 only +`, sigHeader(gpgsigSha256Prefix, fakeSig)) + + c, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if c.Gpgsig != nil { + t.Errorf("expected Gpgsig nil, got %q", c.Gpgsig) + } + if diff := cmp.Diff([]byte(fakeSig), c.GpgsigSha256); diff != "" { + t.Errorf("GpgsigSha256 mismatch (-want +got):\n%s", diff) + } + + rejoined, err := JoinCommit(c) + if err != nil { + t.Fatalf("JoinCommit: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +// TestSplitCommit_GpgsigInBody confirms that gpgsig-looking lines in the +// commit message body are treated as plain text, not as a second gpgsig +// header. Once the blank line ends the header section, prefix detection and +// duplicate-header rejection are off — body content is opaque to the +// parser. +func TestSplitCommit_GpgsigInBody(t *testing.T) { + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%s +This commit message has lines that look like gpgsig headers: +gpgsig pretend-sig-here +gpgsig-sha256 pretend-other-sig + indented-continuation-shaped-line +gpgsig second-fake +`, sigHeader(gpgsigPrefix, fakeSig)) + + c, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if diff := cmp.Diff([]byte(fakeSig), c.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) + } + if c.GpgsigSha256 != nil { + t.Errorf("expected GpgsigSha256 nil, got %q", c.GpgsigSha256) + } + for _, want := range []string{ + "gpgsig pretend-sig-here", + "gpgsig-sha256 pretend-other-sig", + "gpgsig second-fake", + } { + if !bytes.Contains(c.Payload, []byte(want)) { + t.Errorf("body line %q should remain in payload", want) + } + } +} + +// TestSplitTag_GpgsigInBody confirms that gpgsig-looking lines in the tag +// message body are treated as plain text, not as duplicate header +// signatures. +func TestSplitTag_GpgsigInBody(t *testing.T) { + raw := fmt.Appendf(nil, `object 040b9af339e69d18848b7bbe05cb27ee42bb0161 +type commit +tag body-noise +tagger Alice 1700000000 +0000 +%s +Tag message with gpgsig-looking text: +gpgsig pretend-sig-here +gpgsig-sha256 pretend-other-sig + +%s`, sigHeader(gpgsigPrefix, fakeSig), fakeSig) + + tag, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + if diff := cmp.Diff([]byte(fakeSig), tag.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff([]byte(fakeSig), tag.InBody); diff != "" { + t.Errorf("InBody mismatch (-want +got):\n%s", diff) + } + for _, want := range []string{ + "gpgsig pretend-sig-here", + "gpgsig-sha256 pretend-other-sig", + } { + if !bytes.Contains(tag.Payload, []byte(want)) { + t.Errorf("body line %q should remain in payload", want) + } + } +} + +// TestSplitCommit_GpgsigPrefixedHeaders confirms unrelated headers that share +// the "gpgsig" prefix (e.g. a hypothetical "gpgsig-key-id" extension) don't +// trip the duplicate-gpgsig check or get extracted as a signature: the +// trailing space in gpgsigPrefix / gpgsigSha256Prefix distinguishes them +// from "gpgsig-*". +func TestSplitCommit_GpgsigPrefixedHeaders(t *testing.T) { + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 +author Alice 1700000000 +0000 +committer Alice 1700000000 +0000 +%sgpgsig-key-id ABCDEF0123456789 +gpgsig-fingerprint 0011223344556677 + +foo +`, sigHeader(gpgsigPrefix, fakeSig)) + + c, err := SplitCommit(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitCommit: %v", err) + } + if diff := cmp.Diff([]byte(fakeSig), c.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) + } + if c.GpgsigSha256 != nil { + t.Errorf("expected GpgsigSha256 nil, got %q", c.GpgsigSha256) + } + for _, want := range []string{"gpgsig-key-id ABCDEF0123456789", "gpgsig-fingerprint 0011223344556677"} { + if !bytes.Contains(c.Payload, []byte(want)) { + t.Errorf("payload should contain %q", want) + } + } +} + // TestSplitCommit_MergeCommit confirms we don't over-reject: commits with // multiple parent headers (merge commits) are valid and must parse cleanly. func TestSplitCommit_MergeCommit(t *testing.T) { - raw := []byte(fmt.Sprintf(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 + raw := fmt.Appendf(nil, `tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 parent 2dc0ab59d7f0a7a62423bd181d9e2ab3adb7b56d parent 1111111111111111111111111111111111111111 parent 2222222222222222222222222222222222222222 @@ -95,9 +315,9 @@ author Alice 1700000000 +0000 committer Alice 1700000000 +0000 %s merge commit -`, gpgsigBlock(fakeSig))) +`, sigHeader(gpgsigPrefix, fakeSig)) - body, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } @@ -106,19 +326,19 @@ merge commit "parent 1111111111111111111111111111111111111111\n", "parent 2222222222222222222222222222222222222222\n", } { - if !bytes.Contains(body, []byte(p)) { - t.Errorf("body missing expected parent line %q", p) + if !bytes.Contains(c.Payload, []byte(p)) { + t.Errorf("payload missing expected parent line %q", p) } } - if diff := cmp.Diff([]byte(fakeSig), sig); diff != "" { - t.Errorf("signature mismatch (-want +got):\n%s", diff) + if diff := cmp.Diff([]byte(fakeSig), c.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) } } // TestSplitCommit_NoSignature documents that an unsigned commit splits without -// error: the entire input becomes payload and the signature is empty. The -// downstream PEM decode + cryptographic check is what surfaces the missing -// signature to the user. +// error: the entire input becomes payload and both signature fields are nil. +// The downstream PEM decode + cryptographic check is what surfaces the +// missing signature to the user. func TestSplitCommit_NoSignature(t *testing.T) { raw := []byte(`tree b333504b8cf3d9c314fed2cc242c5c38e89534a5 author Alice 1700000000 +0000 @@ -126,15 +346,15 @@ committer Alice 1700000000 +0000 unsigned commit `) - body, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - if len(sig) != 0 { - t.Errorf("expected empty sig, got %q", sig) + if c.Gpgsig != nil || c.GpgsigSha256 != nil { + t.Errorf("expected both signature fields nil, got Gpgsig=%q GpgsigSha256=%q", c.Gpgsig, c.GpgsigSha256) } - if diff := cmp.Diff(raw, body); diff != "" { - t.Errorf("body should equal input (-want +got):\n%s", diff) + if diff := cmp.Diff(raw, c.Payload); diff != "" { + t.Errorf("payload should equal input (-want +got):\n%s", diff) } } @@ -144,15 +364,15 @@ unsigned commit // signed. func TestSplitCommit_NoHeaderTerminator(t *testing.T) { raw := []byte("tree b333504b8cf3d9c314fed2cc242c5c38e89534a5\n") - body, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - if len(sig) != 0 { - t.Errorf("expected empty sig, got %q", sig) + if c.Gpgsig != nil || c.GpgsigSha256 != nil { + t.Errorf("expected no signatures, got Gpgsig=%q GpgsigSha256=%q", c.Gpgsig, c.GpgsigSha256) } - if diff := cmp.Diff(raw, body); diff != "" { - t.Errorf("body should equal input (-want +got):\n%s", diff) + if diff := cmp.Diff(raw, c.Payload); diff != "" { + t.Errorf("payload should equal input (-want +got):\n%s", diff) } } @@ -171,13 +391,13 @@ func TestSplitCommit_LenientContinuation(t *testing.T) { "\n" + "foo\n") - _, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } want := "-----BEGIN SIGNED MESSAGE-----\ntwo-space-continuation\nextra-tab\n-----END SIGNED MESSAGE-----\n" - if diff := cmp.Diff([]byte(want), sig); diff != "" { - t.Errorf("signature mismatch (-want +got):\n%s", diff) + if diff := cmp.Diff([]byte(want), c.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) } } @@ -185,11 +405,11 @@ func TestSplitCommit_LenientContinuation(t *testing.T) { // it back, and confirm the bytes are identical. func TestJoinCommit_RoundTrip(t *testing.T) { raw := loadObject(t, "commit.txt") - body, sig, err := SplitCommit(bytes.NewReader(raw)) + c, err := SplitCommit(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitCommit: %v", err) } - rejoined, err := JoinCommit(body, sig) + rejoined, err := JoinCommit(c) if err != nil { t.Fatalf("JoinCommit: %v", err) } @@ -199,30 +419,97 @@ func TestJoinCommit_RoundTrip(t *testing.T) { } func TestJoinCommit_NoHeaderTerminator(t *testing.T) { - _, err := JoinCommit([]byte("not-a-commit"), []byte("sig")) + _, err := JoinCommit(&CommitSig{Payload: []byte("not-a-commit"), Gpgsig: []byte("sig")}) if !errors.Is(err, ErrMalformedObject) { t.Fatalf("want ErrMalformedObject, got %v", err) } } +// TestJoinCommit_PassthroughNoSig documents that JoinCommit on a CommitSig +// with no signatures returns the payload unchanged — useful for callers that +// want a single code path regardless of whether the input was signed. +func TestJoinCommit_PassthroughNoSig(t *testing.T) { + in := []byte("not even a real commit, no terminator") + out, err := JoinCommit(&CommitSig{Payload: in}) + if err != nil { + t.Fatalf("JoinCommit: %v", err) + } + if diff := cmp.Diff(in, out); diff != "" { + t.Errorf("passthrough mismatch (-want +got):\n%s", diff) + } +} + // TestSplitTag_WellFormed runs against a real signed tag (testdata/tag.txt = // `git cat-file -p v0.1.0`) and confirms split + join round-trips // byte-for-byte. func TestSplitTag_WellFormed(t *testing.T) { raw := loadObject(t, "tag.txt") - body, sig, err := SplitTag(bytes.NewReader(raw)) + tag, err := SplitTag(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitTag: %v", err) } - if !bytes.HasPrefix(body, []byte("object ")) { - t.Errorf("body should start with 'object ', got %q", firstLine(body)) + if !bytes.HasPrefix(tag.Payload, []byte("object ")) { + t.Errorf("payload should start with 'object ', got %q", firstLine(tag.Payload)) } - if !bytes.HasPrefix(sig, []byte("-----BEGIN ")) { - t.Errorf("sig should start with PEM marker, got %q", firstLine(sig)) + if !bytes.HasPrefix(tag.InBody, []byte("-----BEGIN ")) { + t.Errorf("InBody should start with PEM marker, got %q", firstLine(tag.InBody)) + } + if tag.Gpgsig != nil || tag.GpgsigSha256 != nil { + t.Errorf("expected no header sigs, got Gpgsig=%q GpgsigSha256=%q", tag.Gpgsig, tag.GpgsigSha256) } - rejoined := JoinTag(body, sig) + rejoined, err := JoinTag(tag) + if err != nil { + t.Fatalf("JoinTag: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { + t.Errorf("round-trip mismatch (-want +got):\n%s", diff) + } +} + +// TestSplitTag_DualSigned exercises a tag with all three signature forms: +// the in-body PEM block (current-algorithm signature) plus gpgsig and +// gpgsig-sha256 headers (alternate-algorithm signatures, per the SHA-256 +// transition spec). All three must be extracted into their own fields with +// the payload stripped of all three. +func TestSplitTag_DualSigned(t *testing.T) { + raw := fmt.Appendf(nil, `object 040b9af339e69d18848b7bbe05cb27ee42bb0161 +type commit +tag dual-signed +tagger Alice 1700000000 +0000 +%s%s +dual signed tag + +%s`, + sigHeader(gpgsigPrefix, fakeSig), + sigHeader(gpgsigSha256Prefix, fakeSig2), + fakeSig) + + tag, err := SplitTag(bytes.NewReader(raw)) + if err != nil { + t.Fatalf("SplitTag: %v", err) + } + if diff := cmp.Diff([]byte(fakeSig), tag.InBody); diff != "" { + t.Errorf("InBody mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff([]byte(fakeSig), tag.Gpgsig); diff != "" { + t.Errorf("Gpgsig mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff([]byte(fakeSig2), tag.GpgsigSha256); diff != "" { + t.Errorf("GpgsigSha256 mismatch (-want +got):\n%s", diff) + } + if bytes.Contains(tag.Payload, []byte("\ngpgsig ")) { + t.Errorf("payload should not contain gpgsig header") + } + if bytes.Contains(tag.Payload, []byte("\ngpgsig-sha256 ")) { + t.Errorf("payload should not contain gpgsig-sha256 header") + } + + rejoined, err := JoinTag(tag) + if err != nil { + t.Fatalf("JoinTag: %v", err) + } if diff := cmp.Diff(raw, rejoined); diff != "" { t.Errorf("round-trip mismatch (-want +got):\n%s", diff) } @@ -251,7 +538,7 @@ cmVhbA== -----END SIGNED MESSAGE----- `) - body, sig, err := SplitTag(bytes.NewReader(raw)) + tag, err := SplitTag(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitTag: %v", err) } @@ -260,27 +547,30 @@ cmVhbA== cmVhbA== -----END SIGNED MESSAGE----- `) - if diff := cmp.Diff(wantSig, sig); diff != "" { - t.Errorf("sig mismatch (-want +got):\n%s", diff) + if diff := cmp.Diff(wantSig, tag.InBody); diff != "" { + t.Errorf("InBody mismatch (-want +got):\n%s", diff) } - // The earlier PEM block must remain in the body — it's part of what was - // signed, not the signature itself. - if !bytes.Contains(body, []byte("ZmFrZQ==")) { - t.Errorf("body should contain the earlier embedded PEM block") + // The earlier PEM block must remain in the payload — it's part of what + // was signed, not the signature itself. + if !bytes.Contains(tag.Payload, []byte("ZmFrZQ==")) { + t.Errorf("payload should contain the earlier embedded PEM block") } - if bytes.Contains(body, []byte("cmVhbA==")) { - t.Errorf("body should not contain the trailing signature payload") + if bytes.Contains(tag.Payload, []byte("cmVhbA==")) { + t.Errorf("payload should not contain the trailing signature payload") } - // Round-trip: body + last block reconstructs the original bytes. - if diff := cmp.Diff(raw, JoinTag(body, sig)); diff != "" { + rejoined, err := JoinTag(tag) + if err != nil { + t.Fatalf("JoinTag: %v", err) + } + if diff := cmp.Diff(raw, rejoined); diff != "" { t.Errorf("round-trip mismatch (-want +got):\n%s", diff) } } // TestSplitTag_NoSignature documents that an unsigned tag splits without -// error: body holds the input and sig is empty. +// error: payload holds the input and all signature fields are nil. func TestSplitTag_NoSignature(t *testing.T) { raw := []byte(`object 040b9af339e69d18848b7bbe05cb27ee42bb0161 type commit @@ -289,15 +579,16 @@ tagger Alice 1700000000 +0000 unsigned tag `) - body, sig, err := SplitTag(bytes.NewReader(raw)) + tag, err := SplitTag(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitTag: %v", err) } - if len(sig) != 0 { - t.Errorf("expected empty sig, got %q", sig) + if tag.InBody != nil || tag.Gpgsig != nil || tag.GpgsigSha256 != nil { + t.Errorf("expected all signatures nil, got InBody=%q Gpgsig=%q GpgsigSha256=%q", + tag.InBody, tag.Gpgsig, tag.GpgsigSha256) } - if diff := cmp.Diff(raw, body); diff != "" { - t.Errorf("body should equal input (-want +got):\n%s", diff) + if diff := cmp.Diff(raw, tag.Payload); diff != "" { + t.Errorf("payload should equal input (-want +got):\n%s", diff) } } @@ -305,25 +596,29 @@ unsigned tag // not rejected — the verifier downstream catches the mismatch. func TestSplitTag_NoHeaderTerminator(t *testing.T) { raw := []byte("object 040b9af339e69d18848b7bbe05cb27ee42bb0161\n") - body, sig, err := SplitTag(bytes.NewReader(raw)) + tag, err := SplitTag(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitTag: %v", err) } - if len(sig) != 0 { - t.Errorf("expected empty sig, got %q", sig) + if tag.InBody != nil || tag.Gpgsig != nil || tag.GpgsigSha256 != nil { + t.Errorf("expected all signatures nil, got InBody=%q Gpgsig=%q GpgsigSha256=%q", + tag.InBody, tag.Gpgsig, tag.GpgsigSha256) } - if diff := cmp.Diff(raw, body); diff != "" { - t.Errorf("body should equal input (-want +got):\n%s", diff) + if diff := cmp.Diff(raw, tag.Payload); diff != "" { + t.Errorf("payload should equal input (-want +got):\n%s", diff) } } func TestJoinTag_RoundTrip(t *testing.T) { raw := loadObject(t, "tag.txt") - body, sig, err := SplitTag(bytes.NewReader(raw)) + tag, err := SplitTag(bytes.NewReader(raw)) if err != nil { t.Fatalf("SplitTag: %v", err) } - rejoined := JoinTag(body, sig) + rejoined, err := JoinTag(tag) + if err != nil { + t.Fatalf("JoinTag: %v", err) + } if diff := cmp.Diff(raw, rejoined); diff != "" { t.Errorf("round-trip mismatch (-want +got):\n%s", diff) } diff --git a/pkg/git/verify.go b/pkg/git/verify.go index 751b45e4..00618832 100644 --- a/pkg/git/verify.go +++ b/pkg/git/verify.go @@ -81,8 +81,25 @@ func Verify(ctx context.Context, git Verifier, rekor rekor.Verifier, data, sig [ }, nil } - // Legacy commit based lookup. - commit, err := ObjectHash(data, sig) + // Legacy rekor lookup: reconstruct the raw object bytes so their hash + // matches what git-core (and the legacy rekor entries) recorded. We + // assume sig is the SHA-1-form signature (gpgsig for commits, in-body + // PEM for tags) — which matches how legacy rekor entries were + // generated. For new gpgsig-sha256 signatures there are no legacy + // entries to look up, so this reconstruction is best-effort. + var raw []byte + switch { + case bytes.HasPrefix(data, []byte("tree ")): + raw, err = JoinCommit(&CommitSig{Payload: data, Gpgsig: sig}) + case bytes.HasPrefix(data, []byte("object ")): + raw, err = JoinTag(&TagSig{Payload: data, InBody: sig}) + default: + return nil, errors.New("could not determine Git object type") + } + if err != nil { + return nil, err + } + commit, err := ObjectHash(raw) if err != nil { return nil, err } @@ -117,25 +134,16 @@ func VerifySignature(data, sig []byte, detached bool, rootCerts, intermediates * return v.Verify(context.Background(), data, sig, detached) } -// ObjectHash is a string representation of an encoded Git object. data is the -// signed payload (the bytes fed to the verifier); sig is the PEM-encoded -// signature that was embedded in the object. The returned hash matches what -// git-core computes for the reassembled raw object. -func ObjectHash(data, sig []byte) (string, error) { - var ( - raw []byte - objType plumbing.ObjectType - err error - ) +// ObjectHash returns the git-core hash of an object given its raw bytes +// (the same bytes git stores in the object database, without the +// " \0" prefix). The object type is inferred from the first +// header line: "tree " for commits, "object " for tags. +func ObjectHash(raw []byte) (string, error) { + var objType plumbing.ObjectType switch { - case bytes.HasPrefix(data, []byte("tree ")): - raw, err = JoinCommit(data, sig) - if err != nil { - return "", err - } + case bytes.HasPrefix(raw, []byte("tree ")): objType = plumbing.CommitObject - case bytes.HasPrefix(data, []byte("object ")): - raw = JoinTag(data, sig) + case bytes.HasPrefix(raw, []byte("object ")): objType = plumbing.TagObject default: return "", errors.New("could not determine Git object type")