diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5205745..61923c6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -128,9 +128,20 @@ jobs: | jq -e --arg f "${{ matrix.expect_file }}" \ 'any(.documents[]; .relative_path == $f)' + fingerprint-sync: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-go@v6 + with: + go-version: stable + cache-dependency-path: go.sum + - run: ./internal/implementations/fingerprint/sync.sh + - run: git diff --exit-code internal/implementations/fingerprint/fingerprint.go + ci-test: if: always() - needs: [build, docker, index] + needs: [build, docker, index, fingerprint-sync] runs-on: ubuntu-latest steps: - if: >- diff --git a/flake.nix b/flake.nix index c0ea0a3..51175bd 100644 --- a/flake.nix +++ b/flake.nix @@ -37,7 +37,8 @@ nativeCheckInputs = [ pkgs.git ]; }; default = self.packages.${system}.scip-go; - + } + // pkgs.lib.optionalAttrs pkgs.stdenv.isLinux { docker = pkgs.dockerTools.buildLayeredImage { name = "scip-go"; tag = version; diff --git a/internal/implementations/fingerprint/README.md b/internal/implementations/fingerprint/README.md new file mode 100644 index 0000000..cba0b75 --- /dev/null +++ b/internal/implementations/fingerprint/README.md @@ -0,0 +1,24 @@ +# fingerprint + +This package is a verbatim copy of +[`golang.org/x/tools/gopls/internal/util/fingerprint`], which is not importable +because it lives under an `internal/` path. + +It is used by the implementation-indexing code to produce canonical +method-signature strings (fingerprints) for the 64-bit bitmask filter that +quickly rejects non-matching type/interface pairs before calling the expensive +`types.Implements`. + +## Keeping in sync + +The copy is pinned to the `golang.org/x/tools` version declared in `go.mod` +(currently **v0.43.0**). Run the sync script whenever you bump that dependency: + +``` bash +./internal/implementations/fingerprint/sync.sh +``` + +The script fetches the file at the matching tag and overwrites the local copy. +Any diff will show up in `git status`. + + [`golang.org/x/tools/gopls/internal/util/fingerprint`]: https://github.com/golang/tools/tree/master/gopls/internal/util/fingerprint diff --git a/internal/implementations/fingerprint/fingerprint.go b/internal/implementations/fingerprint/fingerprint.go new file mode 100644 index 0000000..20517f1 --- /dev/null +++ b/internal/implementations/fingerprint/fingerprint.go @@ -0,0 +1,466 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fingerprint defines a function to [Encode] types as strings +// with the property that identical types have equal string encodings, +// in most cases. In the remaining cases (mostly involving generic +// types), the encodings can be parsed using [Parse] into [Tree] form +// and matched using [Matches]. +package fingerprint + +import ( + "fmt" + "go/types" + "reflect" + "strconv" + "strings" + "text/scanner" +) + +// Encode returns an encoding of a [types.Type] such that, in +// most cases, Encode(x) == Encode(y) iff [types.Identical](x, y). +// +// For a minority of types, mostly involving type parameters, identity +// cannot be reduced to string comparison; these types are called +// "tricky", and are indicated by the boolean result. +// +// In general, computing identity correctly for tricky types requires +// the type checker. However, the fingerprint encoding can be parsed +// by [Parse] into a [Tree] form that permits simple matching sufficient +// to allow a type parameter to unify with any subtree; see [Match]. +// +// In the standard library, 99.8% of package-level types have a +// non-tricky method-set. The most common exceptions are due to type +// parameters. +// +// fingerprint.Encode is defined only for the signature types of functions +// and methods. It must not be called for "untyped" basic types, nor +// the type of a generic function. +func Encode(t types.Type) (_ string, tricky bool) { return fingerprint(t) } + +// A Tree is a parsed form of a fingerprint for use with [Matches]. +type Tree struct{ tree sexpr } + +// String returns the tree in an unspecified human-readable form. +func (tree Tree) String() string { + var out strings.Builder + writeSexpr(&out, tree.tree) + return out.String() +} + +// Parse parses a fingerprint into tree form. +// +// The input must have been produced by [Encode] at the same source +// version; parsing is thus infallible. +func Parse(fp string) Tree { + return Tree{parseFingerprint(fp)} +} + +// Matches reports whether two fingerprint trees match, meaning that +// under some conditions (for example, particular instantiations of +// type parameters) the two types may be identical. +func Matches(x, y Tree) bool { + return unify(x.tree, y.tree) +} + +// Fingerprint syntax +// +// The lexical syntax is essentially Lisp S-expressions: +// +// expr = STRING | INTEGER | IDENT | '(' expr... ')' +// +// where the tokens are as defined by text/scanner. +// +// The grammar of expression forms is: +// +// τ = IDENT -- named or basic type +// | (qual STRING IDENT) -- qualified named type +// | (array INTEGER τ) +// | (slice τ) +// | (ptr τ) +// | (chan IDENT τ) +// | (func τ v? τ) -- signature params, results, variadic? +// | (map τ τ) +// | (struct field*) +// | (tuple τ*) +// | (interface) -- nonempty interface (lossy) +// | (typeparam INTEGER) +// | (inst τ τ...) -- instantiation of a named type +// +// field = IDENT IDENT STRING τ -- name, embedded?, tag, type + +func fingerprint(t types.Type) (string, bool) { + var buf strings.Builder + tricky := false + var print func(t types.Type) + print = func(t types.Type) { + switch t := t.(type) { + case *types.Alias: + print(types.Unalias(t)) + + case *types.Named: + targs := t.TypeArgs() + if targs != nil { + buf.WriteString("(inst ") + } + tname := t.Obj() + if tname.Pkg() != nil { + fmt.Fprintf(&buf, "(qual %q %s)", tname.Pkg().Path(), tname.Name()) + } else if tname.Name() != "error" && tname.Name() != "comparable" { + panic(tname) // error and comparable the only named types with no package + } else { + buf.WriteString(tname.Name()) + } + if targs != nil { + for t0 := range targs.Types() { + buf.WriteByte(' ') + print(t0) + } + buf.WriteString(")") + } + + case *types.Array: + fmt.Fprintf(&buf, "(array %d ", t.Len()) + print(t.Elem()) + buf.WriteByte(')') + + case *types.Slice: + buf.WriteString("(slice ") + print(t.Elem()) + buf.WriteByte(')') + + case *types.Pointer: + buf.WriteString("(ptr ") + print(t.Elem()) + buf.WriteByte(')') + + case *types.Map: + buf.WriteString("(map ") + print(t.Key()) + buf.WriteByte(' ') + print(t.Elem()) + buf.WriteByte(')') + + case *types.Chan: + fmt.Fprintf(&buf, "(chan %d ", t.Dir()) + print(t.Elem()) + buf.WriteByte(')') + + case *types.Tuple: + buf.WriteString("(tuple") + for v := range t.Variables() { + buf.WriteByte(' ') + print(v.Type()) + } + buf.WriteByte(')') + + case *types.Basic: + // Print byte/uint8 as "byte" instead of calling + // BasicType.String, which prints the two distinctly + // (even though their Kinds are numerically equal). + // Ditto for rune/int32. + switch t.Kind() { + case types.Byte: + buf.WriteString("byte") + case types.Rune: + buf.WriteString("rune") + case types.UnsafePointer: + buf.WriteString(`(qual "unsafe" Pointer)`) + default: + if t.Info()&types.IsUntyped != 0 { + panic("fingerprint of untyped type") + } + buf.WriteString(t.String()) + } + + case *types.Signature: + buf.WriteString("(func ") + print(t.Params()) + if t.Variadic() { + buf.WriteString(" v") + } + buf.WriteByte(' ') + print(t.Results()) + buf.WriteByte(')') + + case *types.Struct: + // Non-empty unnamed struct types in method + // signatures are vanishingly rare. + buf.WriteString("(struct") + for i := range t.NumFields() { + f := t.Field(i) + name := f.Name() + if !f.Exported() { + name = fmt.Sprintf("(qual %q %s)", f.Pkg().Path(), name) + } + + // This isn't quite right for embedded type aliases. + // (See types.TypeString(StructType) and #44410 for context.) + // But this is vanishingly rare. + fmt.Fprintf(&buf, " %s %t %q ", name, f.Embedded(), t.Tag(i)) + print(f.Type()) + } + buf.WriteByte(')') + + case *types.Interface: + if t.NumMethods() == 0 { + buf.WriteString("any") // common case + } else { + // Interface assignability is particularly + // tricky due to the possibility of recursion. + // However, nontrivial interface type literals + // are exceedingly rare in function signatures. + // + // TODO(adonovan): add disambiguating precision + // (e.g. number of methods, their IDs and arities) + // as needs arise (i.e. collisions are observed). + tricky = true + buf.WriteString("(interface)") + } + + case *types.TypeParam: + // Matching of type parameters will require + // parsing fingerprints and unification. + tricky = true + fmt.Fprintf(&buf, "(%s %d)", symTypeparam, t.Index()) + + default: // incl. *types.Union + panic(t) + } + } + + print(t) + + return buf.String(), tricky +} + +// sexpr defines the representation of a fingerprint tree. +type ( + sexpr any // = string | int | symbol | *cons | nil + symbol string + cons struct{ car, cdr sexpr } +) + +// parseFingerprint returns the type encoded by fp in tree form. +// +// The input must have been produced by [fingerprint] at the same +// source version; parsing is thus infallible. +func parseFingerprint(fp string) sexpr { + var scan scanner.Scanner + scan.Error = func(scan *scanner.Scanner, msg string) { panic(msg) } + scan.Init(strings.NewReader(fp)) + + // next scans a token and updates tok. + var tok rune + next := func() { tok = scan.Scan() } + + next() + + // parse parses a fingerprint and returns its tree. + var parse func() sexpr + parse = func() sexpr { + if tok == '(' { + next() // consume '(' + var head sexpr // empty list + tailcdr := &head + for tok != ')' { + cell := &cons{car: parse()} + *tailcdr = cell + tailcdr = &cell.cdr + } + next() // consume ')' + return head + } + + s := scan.TokenText() + switch tok { + case scanner.Ident: + next() // consume IDENT + return symbol(s) + + case scanner.Int: + next() // consume INT + i, err := strconv.Atoi(s) + if err != nil { + panic(err) + } + return i + + case scanner.String: + next() // consume STRING + s, err := strconv.Unquote(s) + if err != nil { + panic(err) + } + return s + + default: + panic(tok) + } + } + + return parse() +} + +// writeSexpr formats an S-expression. +// It is provided for debugging. +func writeSexpr(out *strings.Builder, x sexpr) { + switch x := x.(type) { + case nil: + out.WriteString("()") + case string: + fmt.Fprintf(out, "%q", x) + case int: + fmt.Fprintf(out, "%d", x) + case symbol: + out.WriteString(string(x)) + case *cons: + out.WriteString("(") + for { + writeSexpr(out, x.car) + if x.cdr == nil { + break + } else if cdr, ok := x.cdr.(*cons); ok { + x = cdr + out.WriteByte(' ') + } else { + // Dotted list: should never happen, + // but support it for debugging. + out.WriteString(" . ") + print(x.cdr) + break + } + } + out.WriteString(")") + default: + panic(x) + } +} + +// unify reports whether x and y match, in the presence of type parameters. +// The constraints on type parameters are ignored, but each type parameter must +// have a consistent binding. +func unify(x, y sexpr) bool { + + // maxTypeParam returns the maximum type parameter index in x. + var maxTypeParam func(x sexpr) int + maxTypeParam = func(x sexpr) int { + if i := typeParamIndex(x); i >= 0 { + return i + } + if c, ok := x.(*cons); ok { + return max(maxTypeParam(c.car), maxTypeParam(c.cdr)) + } + return -1 + } + + // xBindings[i] is the binding for type parameter #i in x, and similarly for y. + // Although type parameters are nominally bound to sexprs, each bindings[i] + // is a *sexpr, so unbound variables can share a binding. + xBindings := make([]*sexpr, maxTypeParam(x)+1) + for i := range len(xBindings) { + xBindings[i] = new(sexpr) + } + yBindings := make([]*sexpr, maxTypeParam(y)+1) + for i := range len(yBindings) { + yBindings[i] = new(sexpr) + } + + // bind sets binding b to s from bindings if it does not occur in s. + bind := func(b *sexpr, s sexpr, bindings []*sexpr) bool { + // occurs reports whether b is present in s. + var occurs func(s sexpr) bool + occurs = func(s sexpr) bool { + if j := typeParamIndex(s); j >= 0 { + return b == bindings[j] + } + if c, ok := s.(*cons); ok { + return occurs(c.car) || occurs(c.cdr) + } + return false + } + + if occurs(s) { + return false + } + *b = s + return true + } + + var uni func(x, y sexpr) bool + uni = func(x, y sexpr) bool { + var bx, by *sexpr + ix := typeParamIndex(x) + if ix >= 0 { + bx = xBindings[ix] + } + iy := typeParamIndex(y) + if iy >= 0 { + by = yBindings[iy] + } + + if bx != nil || by != nil { + // If both args are type params and neither is bound, have them share a binding. + if bx != nil && by != nil && *bx == nil && *by == nil { + xBindings[ix] = yBindings[iy] + return true + } + // Treat param bindings like original args in what follows. + if bx != nil && *bx != nil { + x = *bx + } + if by != nil && *by != nil { + y = *by + } + // If the x param is unbound, bind it to y. + if bx != nil && *bx == nil { + return bind(bx, y, yBindings) + } + // If the y param is unbound, bind it to x. + if by != nil && *by == nil { + return bind(by, x, xBindings) + } + // Unify the binding of a bound parameter. + return uni(x, y) + } + + // Neither arg is a type param. + if reflect.TypeOf(x) != reflect.TypeOf(y) { + return false // type mismatch + } + switch x := x.(type) { + case nil, string, int, symbol: + return x == y + case *cons: + y := y.(*cons) + if !uni(x.car, y.car) { + return false + } + if x.cdr == nil { + return y.cdr == nil + } + if y.cdr == nil { + return false + } + return uni(x.cdr, y.cdr) + default: + panic(fmt.Sprintf("unify %T %T", x, y)) + } + } + // At least one param is bound. Unify its binding with the other. + return uni(x, y) +} + +// typeParamIndex returns the index of the type parameter, +// if x has the form "(typeparam INTEGER)", otherwise -1. +func typeParamIndex(x sexpr) int { + if x, ok := x.(*cons); ok { + if sym, ok := x.car.(symbol); ok && sym == symTypeparam { + return x.cdr.(*cons).car.(int) + } + } + return -1 +} + +const symTypeparam = "typeparam" diff --git a/internal/implementations/fingerprint/sync.sh b/internal/implementations/fingerprint/sync.sh new file mode 100755 index 0000000..28a3ebe --- /dev/null +++ b/internal/implementations/fingerprint/sync.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Sync fingerprint.go from the upstream golang.org/x/tools repository. +# +# The version tag is read from go.mod so the copy stays pinned to the +# same golang.org/x/tools release that scip-go depends on. +set -euo pipefail + +VERSION=$(go list -m -f '{{.Version}}' golang.org/x/tools) +SRC="https://raw.githubusercontent.com/golang/tools/${VERSION}/gopls/internal/util/fingerprint/fingerprint.go" +DST="$(dirname "$0")/fingerprint.go" + +echo "Fetching fingerprint.go at golang.org/x/tools ${VERSION}..." +curl -fsSL "$SRC" -o "$DST" +echo "Updated $DST" diff --git a/internal/implementations/implementations.go b/internal/implementations/implementations.go index f0f9569..ef3d586 100644 --- a/internal/implementations/implementations.go +++ b/internal/implementations/implementations.go @@ -4,8 +4,12 @@ import ( "fmt" "go/ast" "go/types" + "hash/crc32" "log/slog" + "sync" + "sync/atomic" + "github.com/scip-code/scip-go/internal/implementations/fingerprint" "github.com/scip-code/scip-go/internal/loader" "github.com/scip-code/scip-go/internal/lookup" "github.com/scip-code/scip-go/internal/output" @@ -26,9 +30,46 @@ type ImplDef struct { Ident *ast.Ident Named *types.Named Methods map[methodID]*scip.SymbolInformation + + // Precomputed fields for fast filtering before calling types.Implements. + Mask uint64 // bitmask of hashed method signatures + MethodCount int + HasUnexported bool // true if any method is unexported + PkgPath string // package path, used to skip cross-package checks for unexported methods +} + +// methodMask computes a 64-bit bitmask from a set of methods, using +// the same algorithm as gopls's methodsets package. Each method's +// identity (Id + fingerprint) is hashed and mapped to a single bit. +// The resulting mask enables O(1) rejection of non-matching +// type/interface pairs: if ty.Mask & iface.Mask != iface.Mask, +// the type is missing at least one method of the interface. +func methodMask(methods []*types.Selection) uint64 { + var mask uint64 + var buf []byte + for _, m := range methods { + fn := m.Obj().(*types.Func) + id := fn.Id() + fp, _ := fingerprint.Encode(fn.Signature()) + buf = append(buf[:0], id...) + buf = append(buf, fp...) + sum := crc32.ChecksumIEEE(buf) + mask |= 1 << uint64(((sum>>24)^(sum>>16)^(sum>>8)^sum)&0x3f) + } + return mask +} + +// hasUnexportedMethods returns true if any of the given methods is unexported. +func hasUnexportedMethods(methods []*types.Selection) bool { + for _, m := range methods { + if !m.Obj().Exported() { + return true + } + } + return false } -func findImplementations(concreteTypes map[string]ImplDef, interfaces map[string]ImplDef, symbols *lookup.Global) { +func findImplementations(concreteTypes map[string]ImplDef, interfaces map[string]ImplDef, symbols *lookup.Global, count *uint64) { for _, ty := range concreteTypes { pos := ty.Ident.Pos() sym, ok := symbols.GetSymbolInformation(ty.Pkg, pos) @@ -46,6 +87,25 @@ func findImplementations(concreteTypes map[string]ImplDef, interfaces map[string continue } + // Fast filter 1: if the interface has unexported methods, + // only types in the same package can implement it. + if iface.HasUnexported && ty.PkgPath != iface.PkgPath { + continue + } + + // Fast filter 2: the type must have at least as many methods + // as the interface requires. + if ty.MethodCount < iface.MethodCount { + continue + } + + // Fast filter 3: bitmask subset check. If the type's mask + // doesn't cover all bits of the interface's mask, the type + // is definitely missing at least one required method. + if ty.Mask&iface.Mask != iface.Mask { + continue + } + // Use types.Implements to correctly check interface satisfaction, // handling all edge cases (embedded types, generics, unexported methods). if !types.Implements(ty.Named, ifaceType) && @@ -72,6 +132,8 @@ func findImplementations(concreteTypes map[string]ImplDef, interfaces map[string }) } } + + atomic.AddUint64(count, 1) } } @@ -81,50 +143,60 @@ func AddImplementationRelationships( symbols *lookup.Global, ) ([]*scip.SymbolInformation, error) { var externalSymbols []*scip.SymbolInformation - err := output.WithProgress("Indexing Implementations", func() error { - var msCache typeutil.MethodSetCache - localInterfaces, localTypes, err := extractInterfacesAndConcreteTypes( - pkgs, symbols, &msCache) - if err != nil { - return err - } - remotePackages := make(loader.PackageLookup) - for pkgID, pkg := range allPackages { - if _, ok := pkgs[pkgID]; ok { - continue - } + var msCache typeutil.MethodSetCache + localInterfaces, localTypes, err := extractInterfacesAndConcreteTypes( + pkgs, symbols, &msCache) + if err != nil { + return nil, err + } - remotePackages[pkgID] = pkg - } - remoteInterfaces, remoteTypes, err := extractInterfacesAndConcreteTypes( - remotePackages, symbols, &msCache) - if err != nil { - return err + remotePackages := make(loader.PackageLookup) + for pkgID, pkg := range allPackages { + if _, ok := pkgs[pkgID]; ok { + continue } + remotePackages[pkgID] = pkg + } + remoteInterfaces, remoteTypes, err := extractInterfacesAndConcreteTypes( + remotePackages, symbols, &msCache) + if err != nil { + return nil, err + } + + // Total concrete types to check across the three passes. + total := uint64(len(localTypes)*2 + len(remoteTypes)) + var count uint64 + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + // local type -> local interface - findImplementations(localTypes, localInterfaces, symbols) + findImplementations(localTypes, localInterfaces, symbols, &count) // local type -> remote interface - findImplementations(localTypes, remoteInterfaces, symbols) + findImplementations(localTypes, remoteInterfaces, symbols, &count) // remote type -> local interface // We emit these as external symbols so index consumer can merge them. - findImplementations(remoteTypes, localInterfaces, symbols) + findImplementations(remoteTypes, localInterfaces, symbols, &count) + }() - // Collect remote type symbols that gained relationships - for _, typ := range remoteTypes { - if sym, ok := symbols.GetSymbolInformation(typ.Pkg, typ.Ident.Pos()); ok { - if len(sym.Relationships) > 0 { - externalSymbols = append(externalSymbols, sym) - } + output.WithProgressParallel(&wg, "Indexing Implementations", &count, total) + + // Collect remote type symbols that gained relationships + for _, typ := range remoteTypes { + if sym, ok := symbols.GetSymbolInformation(typ.Pkg, typ.Ident.Pos()); ok { + if len(sym.Relationships) > 0 { + externalSymbols = append(externalSymbols, sym) } } + } - return nil - }) - return externalSymbols, err + return externalSymbols, nil } func extractInterfacesAndConcreteTypes( @@ -209,11 +281,15 @@ func extractInterfacesAndConcreteTypes( } d := ImplDef{ - Symbol: symbol, - Pkg: pkg, - Ident: ident, - Named: objType, - Methods: methodIds, + Symbol: symbol, + Pkg: pkg, + Ident: ident, + Named: objType, + Methods: methodIds, + Mask: methodMask(methods), + MethodCount: len(methods), + HasUnexported: hasUnexportedMethods(methods), + PkgPath: pkg.PkgPath, } if types.IsInterface(objType) {