From e49cc43d95ad28762c9af114f480d85e96002d73 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 31 Mar 2023 04:14:33 +0200 Subject: [PATCH 1/4] feat(gateway): IPFSBackend tracing and metrics - Adds spans for every IPFSBackend API call - Adds success/failure histogram for each API call ipfs_gw_backend_api_call_duration_seconds --- gateway/handler.go | 134 +------------------ gateway/metrics.go | 312 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 318 insertions(+), 128 deletions(-) create mode 100644 gateway/metrics.go diff --git a/gateway/handler.go b/gateway/handler.go index 4c3fe29fd..6a5f17e87 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -20,7 +20,6 @@ import ( cid "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log" prometheus "github.com/prometheus/client_golang/prometheus" - "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" @@ -81,6 +80,12 @@ type handler struct { ipnsRecordGetMetric *prometheus.HistogramVec } +// NewHandler returns an http.Handler that can act as a gateway to IPFS content +// offlineApi is a version of the API that should not make network requests for missing data +func NewHandler(c Config, api IPFSBackend) http.Handler { + return newHandlerWithMetrics(c, api) +} + // StatusResponseWriter enables us to override HTTP Status Code passed to // WriteHeader function inside of http.ServeContent. Decision is based on // presence of HTTP Headers such as Location. @@ -149,128 +154,6 @@ func (w *errRecordingResponseWriter) ReadFrom(r io.Reader) (n int64, err error) return n, err } -func newSummaryMetric(name string, help string) *prometheus.SummaryVec { - summaryMetric := prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: "ipfs", - Subsystem: "http", - Name: name, - Help: help, - }, - []string{"gateway"}, - ) - if err := prometheus.Register(summaryMetric); err != nil { - if are, ok := err.(prometheus.AlreadyRegisteredError); ok { - summaryMetric = are.ExistingCollector.(*prometheus.SummaryVec) - } else { - log.Errorf("failed to register ipfs_http_%s: %v", name, err) - } - } - return summaryMetric -} - -func newHistogramMetric(name string, help string) *prometheus.HistogramVec { - // We can add buckets as a parameter in the future, but for now using static defaults - // suggested in https://github.com/ipfs/kubo/issues/8441 - defaultBuckets := []float64{0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60} - histogramMetric := prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Namespace: "ipfs", - Subsystem: "http", - Name: name, - Help: help, - Buckets: defaultBuckets, - }, - []string{"gateway"}, - ) - if err := prometheus.Register(histogramMetric); err != nil { - if are, ok := err.(prometheus.AlreadyRegisteredError); ok { - histogramMetric = are.ExistingCollector.(*prometheus.HistogramVec) - } else { - log.Errorf("failed to register ipfs_http_%s: %v", name, err) - } - } - return histogramMetric -} - -// NewHandler returns an http.Handler that can act as a gateway to IPFS content -// offlineApi is a version of the API that should not make network requests for missing data -func NewHandler(c Config, api IPFSBackend) http.Handler { - return newHandler(c, api) -} - -func newHandler(c Config, api IPFSBackend) *handler { - i := &handler{ - config: c, - api: api, - // Improved Metrics - // ---------------------------- - // Time till the first content block (bar in /ipfs/cid/foo/bar) - // (format-agnostic, across all response types) - firstContentBlockGetMetric: newHistogramMetric( - "gw_first_content_block_get_latency_seconds", - "The time till the first content block is received on GET from the gateway.", - ), - - // Response-type specific metrics - // ---------------------------- - // Generic: time it takes to execute a successful gateway request (all request types) - getMetric: newHistogramMetric( - "gw_get_duration_seconds", - "The time to GET a successful response to a request (all content types).", - ), - // UnixFS: time it takes to return a file - unixfsFileGetMetric: newHistogramMetric( - "gw_unixfs_file_get_duration_seconds", - "The time to serve an entire UnixFS file from the gateway.", - ), - // UnixFS: time it takes to find and serve an index.html file on behalf of a directory. - unixfsDirIndexGetMetric: newHistogramMetric( - "gw_unixfs_dir_indexhtml_get_duration_seconds", - "The time to serve an index.html file on behalf of a directory from the gateway. This is a subset of gw_unixfs_file_get_duration_seconds.", - ), - // UnixFS: time it takes to generate static HTML with directory listing - unixfsGenDirListingGetMetric: newHistogramMetric( - "gw_unixfs_gen_dir_listing_get_duration_seconds", - "The time to serve a generated UnixFS HTML directory listing from the gateway.", - ), - // CAR: time it takes to return requested CAR stream - carStreamGetMetric: newHistogramMetric( - "gw_car_stream_get_duration_seconds", - "The time to GET an entire CAR stream from the gateway.", - ), - // Block: time it takes to return requested Block - rawBlockGetMetric: newHistogramMetric( - "gw_raw_block_get_duration_seconds", - "The time to GET an entire raw Block from the gateway.", - ), - // TAR: time it takes to return requested TAR stream - tarStreamGetMetric: newHistogramMetric( - "gw_tar_stream_get_duration_seconds", - "The time to GET an entire TAR stream from the gateway.", - ), - // JSON/CBOR: time it takes to return requested DAG-JSON/-CBOR document - jsoncborDocumentGetMetric: newHistogramMetric( - "gw_jsoncbor_get_duration_seconds", - "The time to GET an entire DAG-JSON/CBOR block from the gateway.", - ), - // IPNS Record: time it takes to return IPNS record - ipnsRecordGetMetric: newHistogramMetric( - "gw_ipns_record_get_duration_seconds", - "The time to GET an entire IPNS Record from the gateway.", - ), - - // Legacy Metrics - // ---------------------------- - unixfsGetMetric: newSummaryMetric( // TODO: remove? - // (deprecated, use firstContentBlockGetMetric instead) - "unixfs_get_latency_seconds", - "DEPRECATED: does not do what you think, use gw_first_content_block_get_latency_seconds instead.", - ), - } - return i -} - func (i *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { defer panicHandler(w) @@ -887,8 +770,3 @@ func handleSuperfluousNamespace(w http.ResponseWriter, r *http.Request, contentP return true } - -// spanTrace starts a new span using the standard IPFS tracing conventions. -func spanTrace(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { - return otel.Tracer("boxo").Start(ctx, fmt.Sprintf("%s.%s", " Gateway", spanName), opts...) -} diff --git a/gateway/metrics.go b/gateway/metrics.go new file mode 100644 index 000000000..22f0b5f3a --- /dev/null +++ b/gateway/metrics.go @@ -0,0 +1,312 @@ +package gateway + +import ( + "context" + "fmt" + "io" + "time" + + "github.com/ipfs/boxo/coreiface/path" + "github.com/ipfs/boxo/files" + "github.com/ipfs/go-cid" + prometheus "github.com/prometheus/client_golang/prometheus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +type ipfsBackendWithMetrics struct { + api IPFSBackend + apiCallMetric *prometheus.HistogramVec +} + +func newIPFSBackendWithMetrics(api IPFSBackend) *ipfsBackendWithMetrics { + // We can add buckets as a parameter in the future, but for now using static defaults + // suggested in https://github.com/ipfs/kubo/issues/8441 + + apiCallMetric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "ipfs", + Subsystem: "gw_backend", + Name: "api_call_duration_seconds", + Help: "The time spent in IPFSBackend API calls that returned success.", + Buckets: []float64{0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60}, + }, + []string{"name", "result"}, + ) + + if err := prometheus.Register(apiCallMetric); err != nil { + if are, ok := err.(prometheus.AlreadyRegisteredError); ok { + apiCallMetric = are.ExistingCollector.(*prometheus.HistogramVec) + } else { + log.Errorf("failed to register ipfs_gw_backend_api_call_duration_seconds: %v", err) + } + } + + return &ipfsBackendWithMetrics{api, apiCallMetric} +} + +func (b *ipfsBackendWithMetrics) updateApiCallMetric(name string, err error, begin time.Time) { + end := time.Since(begin).Seconds() + if err == nil { + b.apiCallMetric.WithLabelValues(name, "success").Observe(end) + } else { + b.apiCallMetric.WithLabelValues(name, "failure").Observe(end) + } +} + +func (b *ipfsBackendWithMetrics) Get(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *GetResponse, error) { + begin := time.Now() + name := "IPFSBackend.Get" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, n, err := b.api.Get(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return md, n, err +} + +func (b *ipfsBackendWithMetrics) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, files.File, error) { + begin := time.Now() + name := "IPFSBackend.GetRange" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, f, err := b.api.GetRange(ctx, path, ranges...) + + b.updateApiCallMetric(name, err, begin) + return md, f, err +} + +func (b *ipfsBackendWithMetrics) GetAll(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { + begin := time.Now() + name := "IPFSBackend.GetAll" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, n, err := b.api.GetAll(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return md, n, err +} + +func (b *ipfsBackendWithMetrics) GetBlock(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.File, error) { + begin := time.Now() + name := "IPFSBackend.GetBlock" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, n, err := b.api.GetBlock(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return md, n, err +} + +func (b *ipfsBackendWithMetrics) Head(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { + begin := time.Now() + name := "IPFSBackend.Head" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, n, err := b.api.Head(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return md, n, err +} + +func (b *ipfsBackendWithMetrics) ResolvePath(ctx context.Context, path ImmutablePath) (ContentPathMetadata, error) { + begin := time.Now() + name := "IPFSBackend.ResolvePath" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, err := b.api.ResolvePath(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return md, err +} + +func (b *ipfsBackendWithMetrics) GetCAR(ctx context.Context, path ImmutablePath) (ContentPathMetadata, io.ReadCloser, <-chan error, error) { + begin := time.Now() + name := "IPFSBackend.GetCAR" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + md, rc, errCh, err := b.api.GetCAR(ctx, path) + + // TODO: handle errCh + b.updateApiCallMetric(name, err, begin) + return md, rc, errCh, err +} + +func (b *ipfsBackendWithMetrics) IsCached(ctx context.Context, path path.Path) bool { + begin := time.Now() + name := "IPFSBackend.IsCached" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + bln := b.api.IsCached(ctx, path) + + b.updateApiCallMetric(name, nil, begin) + return bln +} + +func (b *ipfsBackendWithMetrics) GetIPNSRecord(ctx context.Context, cid cid.Cid) ([]byte, error) { + begin := time.Now() + name := "IPFSBackend.GetIPNSRecord" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("cid", cid.String()))) + defer span.End() + + r, err := b.api.GetIPNSRecord(ctx, cid) + + b.updateApiCallMetric(name, err, begin) + return r, err +} + +func (b *ipfsBackendWithMetrics) ResolveMutable(ctx context.Context, path path.Path) (ImmutablePath, error) { + begin := time.Now() + name := "IPFSBackend.ResolveMutable" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + defer span.End() + + p, err := b.api.ResolveMutable(ctx, path) + + b.updateApiCallMetric(name, err, begin) + return p, err +} + +func (b *ipfsBackendWithMetrics) GetDNSLinkRecord(ctx context.Context, fqdn string) (path.Path, error) { + begin := time.Now() + name := "IPFSBackend.GetDNSLinkRecord" + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("fqdn", fqdn))) + defer span.End() + + p, err := b.api.GetDNSLinkRecord(ctx, fqdn) + + b.updateApiCallMetric(name, err, begin) + return p, err +} + +var _ IPFSBackend = (*ipfsBackendWithMetrics)(nil) + +func newHandlerWithMetrics(c Config, api IPFSBackend) *handler { + i := &handler{ + config: c, + api: newIPFSBackendWithMetrics(api), + // Improved Metrics + // ---------------------------- + // Time till the first content block (bar in /ipfs/cid/foo/bar) + // (format-agnostic, across all response types) + firstContentBlockGetMetric: newHistogramMetric( + "gw_first_content_block_get_latency_seconds", + "The time till the first content block is received on GET from the gateway.", + ), + + // Response-type specific metrics + // ---------------------------- + // Generic: time it takes to execute a successful gateway request (all request types) + getMetric: newHistogramMetric( + "gw_get_duration_seconds", + "The time to GET a successful response to a request (all content types).", + ), + // UnixFS: time it takes to return a file + unixfsFileGetMetric: newHistogramMetric( + "gw_unixfs_file_get_duration_seconds", + "The time to serve an entire UnixFS file from the gateway.", + ), + // UnixFS: time it takes to find and serve an index.html file on behalf of a directory. + unixfsDirIndexGetMetric: newHistogramMetric( + "gw_unixfs_dir_indexhtml_get_duration_seconds", + "The time to serve an index.html file on behalf of a directory from the gateway. This is a subset of gw_unixfs_file_get_duration_seconds.", + ), + // UnixFS: time it takes to generate static HTML with directory listing + unixfsGenDirListingGetMetric: newHistogramMetric( + "gw_unixfs_gen_dir_listing_get_duration_seconds", + "The time to serve a generated UnixFS HTML directory listing from the gateway.", + ), + // CAR: time it takes to return requested CAR stream + carStreamGetMetric: newHistogramMetric( + "gw_car_stream_get_duration_seconds", + "The time to GET an entire CAR stream from the gateway.", + ), + // Block: time it takes to return requested Block + rawBlockGetMetric: newHistogramMetric( + "gw_raw_block_get_duration_seconds", + "The time to GET an entire raw Block from the gateway.", + ), + // TAR: time it takes to return requested TAR stream + tarStreamGetMetric: newHistogramMetric( + "gw_tar_stream_get_duration_seconds", + "The time to GET an entire TAR stream from the gateway.", + ), + // JSON/CBOR: time it takes to return requested DAG-JSON/-CBOR document + jsoncborDocumentGetMetric: newHistogramMetric( + "gw_jsoncbor_get_duration_seconds", + "The time to GET an entire DAG-JSON/CBOR block from the gateway.", + ), + // IPNS Record: time it takes to return IPNS record + ipnsRecordGetMetric: newHistogramMetric( + "gw_ipns_record_get_duration_seconds", + "The time to GET an entire IPNS Record from the gateway.", + ), + + // Legacy Metrics + // ---------------------------- + unixfsGetMetric: newSummaryMetric( // TODO: remove? + // (deprecated, use firstContentBlockGetMetric instead) + "unixfs_get_latency_seconds", + "DEPRECATED: does not do what you think, use gw_first_content_block_get_latency_seconds instead.", + ), + } + return i +} + +func newSummaryMetric(name string, help string) *prometheus.SummaryVec { + summaryMetric := prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: "ipfs", + Subsystem: "http", + Name: name, + Help: help, + }, + []string{"gateway"}, + ) + if err := prometheus.Register(summaryMetric); err != nil { + if are, ok := err.(prometheus.AlreadyRegisteredError); ok { + summaryMetric = are.ExistingCollector.(*prometheus.SummaryVec) + } else { + log.Errorf("failed to register ipfs_http_%s: %v", name, err) + } + } + return summaryMetric +} + +func newHistogramMetric(name string, help string) *prometheus.HistogramVec { + // We can add buckets as a parameter in the future, but for now using static defaults + // suggested in https://github.com/ipfs/kubo/issues/8441 + defaultBuckets := []float64{0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60} + histogramMetric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "ipfs", + Subsystem: "http", + Name: name, + Help: help, + Buckets: defaultBuckets, + }, + []string{"gateway"}, + ) + if err := prometheus.Register(histogramMetric); err != nil { + if are, ok := err.(prometheus.AlreadyRegisteredError); ok { + histogramMetric = are.ExistingCollector.(*prometheus.HistogramVec) + } else { + log.Errorf("failed to register ipfs_http_%s: %v", name, err) + } + } + return histogramMetric +} + +// spanTrace starts a new span using the standard IPFS tracing conventions. +func spanTrace(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { + return otel.Tracer("boxo").Start(ctx, fmt.Sprintf("%s.%s", " Gateway", spanName), opts...) +} From b77525945b8f92698a7cf0d16e9fbda1d8e9c47b Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Thu, 30 Mar 2023 12:06:15 -0400 Subject: [PATCH 2/4] fix: switch gateway GetRange API to match Get Also fixes serving range requests for /ipfs/bafydir when we know we'll get the index.html or _redirect. --- gateway/blocks_gateway.go | 19 +------- gateway/gateway.go | 9 ++-- gateway/gateway_test.go | 2 +- gateway/handler_defaults.go | 81 +++++++++++++++-------------------- gateway/handler_test.go | 4 +- gateway/handler_unixfs_dir.go | 8 +++- gateway/metrics.go | 2 +- 7 files changed, 52 insertions(+), 73 deletions(-) diff --git a/gateway/blocks_gateway.go b/gateway/blocks_gateway.go index ce18bfd9d..9b70ab51c 100644 --- a/gateway/blocks_gateway.go +++ b/gateway/blocks_gateway.go @@ -180,23 +180,8 @@ func (api *BlocksGateway) Get(ctx context.Context, path ImmutablePath) (ContentP return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types? } -func (api *BlocksGateway) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, files.File, error) { - md, nd, err := api.getNode(ctx, path) - if err != nil { - return md, nil, err - } - - // This code path covers full graph, single file/directory, and range requests - n, err := ufile.NewUnixfsFile(ctx, api.dagService, nd) - if err != nil { - return md, nil, err - } - f, ok := n.(files.File) - if !ok { - return ContentPathMetadata{}, nil, NewErrorResponse(fmt.Errorf("can only do range requests on files, but did not get a file"), http.StatusBadRequest) - } - - return md, f, nil +func (api *BlocksGateway) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { + return api.Get(ctx, path) } func (api *BlocksGateway) GetAll(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { diff --git a/gateway/gateway.go b/gateway/gateway.go index 86d2db8fb..9f198d3b5 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -91,10 +91,11 @@ type IPFSBackend interface { // Size, and Cid. Get(context.Context, ImmutablePath) (ContentPathMetadata, *GetResponse, error) - // GetRange returns a full UnixFS file object. Ranges passed in are advisory for pre-fetching data, however - // consumers of this function may require extra data beyond the passed ranges (e.g. the initial bit of the file - // might be used for content type sniffing even if only the end of the file is requested). - GetRange(context.Context, ImmutablePath, ...GetRange) (ContentPathMetadata, files.File, error) + // GetRange returns a full UnixFS file object, or a UnixFS directory. Ranges passed in are advisory for pre-fetching + // data, however consumers of this function may require extra data beyond the passed ranges (e.g. the initial bit of + // the file might be used for content type sniffing even if only the end of the file is requested). + // Note: there is currently no semantic meaning attached to a range request for a directory + GetRange(context.Context, ImmutablePath, ...GetRange) (ContentPathMetadata, *GetResponse, error) // GetAll returns a UnixFS file or directory depending on what the path is that has been requested. Directories should // include all content recursively. diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index aa36b47e9..9a1e4b5f8 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -111,7 +111,7 @@ func (api *mockAPI) Get(ctx context.Context, immutablePath ImmutablePath) (Conte return api.gw.Get(ctx, immutablePath) } -func (api *mockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, files.File, error) { +func (api *mockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { return api.gw.GetRange(ctx, immutablePath, ranges...) } diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 205ac6064..88fe1ed5b 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -29,6 +29,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h isDirectoryHeadRequest bool directoryMetadata *directoryMetadata err error + ranges []GetRange ) switch r.Method { @@ -48,63 +49,51 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h return false } case http.MethodGet: - // TODO: refactor below: we should not have 2x20 duplicated flow control when the only difference is ranges. rangeHeader := r.Header.Get("Range") - if rangeHeader == "" { - var getResp *GetResponse - // TODO: passing resolved path here, instead of contentPath is harming content routing. Knowing original immutableContentPath will allow backend to find providers for parents, even when internal CIDs are not announced, and will provide better key for caching related DAGs. - pathMetadata, getResp, err = i.api.Get(ctx, maybeResolvedImPath) - if err != nil { - if isWebRequest(requestedContentType) { - forwardedPath, continueProcessing := i.handleWebRequestErrors(w, r, maybeResolvedImPath, immutableContentPath, contentPath, err, logger) - if !continueProcessing { - return false - } - pathMetadata, getResp, err = i.api.Get(ctx, forwardedPath) - if err != nil { - err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err) - webError(w, err, http.StatusInternalServerError) - } - } else { - if !i.handleRequestErrors(w, contentPath, err) { - return false - } - } - } - if getResp.bytes != nil { - bytesResponse = getResp.bytes - defer bytesResponse.Close() - } else { - directoryMetadata = getResp.directoryMetadata - } - } else { + if rangeHeader != "" { // TODO: Add tests for range parsing - var ranges []GetRange ranges, err = parseRange(rangeHeader) if err != nil { webError(w, fmt.Errorf("invalid range request: %w", err), http.StatusBadRequest) return false } - pathMetadata, bytesResponse, err = i.api.GetRange(ctx, maybeResolvedImPath, ranges...) - if err != nil { - if isWebRequest(requestedContentType) { - forwardedPath, continueProcessing := i.handleWebRequestErrors(w, r, maybeResolvedImPath, immutableContentPath, contentPath, err, logger) - if !continueProcessing { - return false - } - pathMetadata, bytesResponse, err = i.api.GetRange(ctx, forwardedPath, ranges...) - if err != nil { - err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err) - webError(w, err, http.StatusInternalServerError) - } + } + + var getResp *GetResponse + // TODO: passing resolved path here, instead of contentPath is harming content routing. Knowing original immutableContentPath will allow backend to find providers for parents, even when internal CIDs are not announced, and will provide better key for caching related DAGs. + if len(ranges) == 0 { + pathMetadata, getResp, err = i.api.Get(ctx, maybeResolvedImPath) + } else { + pathMetadata, getResp, err = i.api.GetRange(ctx, maybeResolvedImPath, ranges...) + } + if err != nil { + if isWebRequest(requestedContentType) { + forwardedPath, continueProcessing := i.handleWebRequestErrors(w, r, maybeResolvedImPath, immutableContentPath, contentPath, err, logger) + if !continueProcessing { + return false + } + if len(ranges) == 0 { + pathMetadata, getResp, err = i.api.Get(ctx, forwardedPath) } else { - if !i.handleRequestErrors(w, contentPath, err) { - return false - } + pathMetadata, getResp, err = i.api.GetRange(ctx, forwardedPath, ranges...) + } + if err != nil { + err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err) + webError(w, err, http.StatusInternalServerError) + } + } else { + if !i.handleRequestErrors(w, contentPath, err) { + return false } } + } + if getResp.bytes != nil { + bytesResponse = getResp.bytes defer bytesResponse.Close() + } else { + directoryMetadata = getResp.directoryMetadata } + default: // This shouldn't be possible to reach which is why it is a 500 rather than 4XX error webError(w, fmt.Errorf("invalid method: cannot use this HTTP method with the given request"), http.StatusInternalServerError) @@ -140,7 +129,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h // Handling Unixfs directory if directoryMetadata != nil || isDirectoryHeadRequest { logger.Debugw("serving unixfs directory", "path", contentPath) - return i.serveDirectory(ctx, w, r, resolvedPath, contentPath, isDirectoryHeadRequest, directoryMetadata, begin, logger) + return i.serveDirectory(ctx, w, r, resolvedPath, contentPath, isDirectoryHeadRequest, directoryMetadata, ranges, begin, logger) } webError(w, fmt.Errorf("unsupported UnixFS type"), http.StatusInternalServerError) diff --git a/gateway/handler_test.go b/gateway/handler_test.go index 0d158042d..bf488d03b 100644 --- a/gateway/handler_test.go +++ b/gateway/handler_test.go @@ -49,7 +49,7 @@ func (api *errorMockAPI) Get(ctx context.Context, path ImmutablePath) (ContentPa return ContentPathMetadata{}, nil, api.err } -func (api *errorMockAPI) GetRange(ctx context.Context, path ImmutablePath, getRange ...GetRange) (ContentPathMetadata, files.File, error) { +func (api *errorMockAPI) GetRange(ctx context.Context, path ImmutablePath, getRange ...GetRange) (ContentPathMetadata, *GetResponse, error) { return ContentPathMetadata{}, nil, api.err } @@ -165,7 +165,7 @@ func (api *panicMockAPI) Get(ctx context.Context, immutablePath ImmutablePath) ( panic("i am panicking") } -func (api *panicMockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, files.File, error) { +func (api *panicMockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { panic("i am panicking") } diff --git a/gateway/handler_unixfs_dir.go b/gateway/handler_unixfs_dir.go index 6c5317246..423af8285 100644 --- a/gateway/handler_unixfs_dir.go +++ b/gateway/handler_unixfs_dir.go @@ -23,7 +23,7 @@ import ( // serveDirectory returns the best representation of UnixFS directory // // It will return index.html if present, or generate directory listing otherwise. -func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, isHeadRequest bool, directoryMetadata *directoryMetadata, begin time.Time, logger *zap.SugaredLogger) bool { +func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, isHeadRequest bool, directoryMetadata *directoryMetadata, ranges []GetRange, begin time.Time, logger *zap.SugaredLogger) bool { ctx, span := spanTrace(ctx, "ServeDirectory", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() @@ -81,7 +81,11 @@ func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r * } } else { var getResp *GetResponse - _, getResp, err = i.api.Get(ctx, imIndexPath) + if len(ranges) == 0 { + _, getResp, err = i.api.Get(ctx, imIndexPath) + } else { + _, getResp, err = i.api.GetRange(ctx, imIndexPath, ranges...) + } if err == nil { if getResp.bytes == nil { webError(w, fmt.Errorf("%q could not be read: %w", imIndexPath, files.ErrNotReader), http.StatusUnprocessableEntity) diff --git a/gateway/metrics.go b/gateway/metrics.go index 22f0b5f3a..ada179e57 100644 --- a/gateway/metrics.go +++ b/gateway/metrics.go @@ -67,7 +67,7 @@ func (b *ipfsBackendWithMetrics) Get(ctx context.Context, path ImmutablePath) (C return md, n, err } -func (b *ipfsBackendWithMetrics) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, files.File, error) { +func (b *ipfsBackendWithMetrics) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { begin := time.Now() name := "IPFSBackend.GetRange" ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) From ecf32dcdbb9823925ebdabefa874961e62120425 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 1 Apr 2023 00:14:26 +0200 Subject: [PATCH 3/4] refactor(gw): merge IPFSBackend.Get and GetRange https://github.com/ipfs/boxo/pull/240#pullrequestreview-1365554977 --- gateway/blocks_gateway.go | 6 +----- gateway/gateway.go | 34 ++++++++++++++++++++++------------ gateway/gateway_test.go | 8 ++------ gateway/handler_defaults.go | 28 ++++++++++++---------------- gateway/handler_test.go | 12 ++---------- gateway/handler_unixfs_dir.go | 8 ++------ gateway/metrics.go | 18 +++--------------- 7 files changed, 44 insertions(+), 70 deletions(-) diff --git a/gateway/blocks_gateway.go b/gateway/blocks_gateway.go index 9b70ab51c..1b47525c3 100644 --- a/gateway/blocks_gateway.go +++ b/gateway/blocks_gateway.go @@ -139,7 +139,7 @@ func NewBlocksGateway(blockService blockservice.BlockService, opts ...BlockGatew }, nil } -func (api *BlocksGateway) Get(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *GetResponse, error) { +func (api *BlocksGateway) Get(ctx context.Context, path ImmutablePath, ranges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { md, nd, err := api.getNode(ctx, path) if err != nil { return md, nil, err @@ -180,10 +180,6 @@ func (api *BlocksGateway) Get(ctx context.Context, path ImmutablePath) (ContentP return ContentPathMetadata{}, nil, fmt.Errorf("data was not a valid file or directory: %w", ErrInternalServerError) // TODO: should there be a gateway invalid content type to abstract over the various IPLD error types? } -func (api *BlocksGateway) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { - return api.Get(ctx, path) -} - func (api *BlocksGateway) GetAll(ctx context.Context, path ImmutablePath) (ContentPathMetadata, files.Node, error) { md, nd, err := api.getNode(ctx, path) if err != nil { diff --git a/gateway/gateway.go b/gateway/gateway.go index 9f198d3b5..b6f33da64 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -54,11 +54,13 @@ type ContentPathMetadata struct { ContentType string // Only used for UnixFS requests } -// GetRange describes a range request within a UnixFS file. From and To mostly follow HTTP Range Request semantics. +// ByteRange describes a range request within a UnixFS file. From and To mostly follow [HTTP Byte Range] Request semantics. // From >= 0 and To = nil: Get the file (From, Length) // From >= 0 and To >= 0: Get the range (From, To) // From >= 0 and To <0: Get the range (From, Length - To) -type GetRange struct { +// +// [HTTP Byte Range]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2 +type ByteRange struct { From uint64 To *int64 } @@ -86,16 +88,24 @@ func NewGetResponseFromDirectoryListing(dagSize uint64, entries <-chan unixfs.Li // There are also some existing error types that the gateway code knows how to handle (e.g. context.DeadlineExceeded // and various IPLD pathing related errors). type IPFSBackend interface { - // Get returns a UnixFS file, UnixFS directory, or an IPLD block depending on what the path is that has been - // requested. Directories' files.DirEntry objects do not need to contain content, but must contain Name, - // Size, and Cid. - Get(context.Context, ImmutablePath) (ContentPathMetadata, *GetResponse, error) - - // GetRange returns a full UnixFS file object, or a UnixFS directory. Ranges passed in are advisory for pre-fetching - // data, however consumers of this function may require extra data beyond the passed ranges (e.g. the initial bit of - // the file might be used for content type sniffing even if only the end of the file is requested). - // Note: there is currently no semantic meaning attached to a range request for a directory - GetRange(context.Context, ImmutablePath, ...GetRange) (ContentPathMetadata, *GetResponse, error) + + // Get returns a GetResponse with UnixFS file, directory or a block in IPLD + // format e.g., (DAG-)CBOR/JSON. + // + // Returned Directories are preferably a minimum info required for enumeration: Name, Size, and Cid. + // + // Optional ranges follow [HTTP Byte Ranges] notation and can be used for + // pre-fetching specific sections of a file or a block. + // + // Range notes: + // - Generating response to a range request may require additional data + // beyond the passed ranges (e.g. a single byte range from the middle of a + // file will still need magic bytes from the very beginning for content + // type sniffing). + // - A range request for a directory currently holds no semantic meaning. + // + // [HTTP Byte Ranges]: https://httpwg.org/specs/rfc9110.html#rfc.section.14.1.2 + Get(context.Context, ImmutablePath, ...ByteRange) (ContentPathMetadata, *GetResponse, error) // GetAll returns a UnixFS file or directory depending on what the path is that has been requested. Directories should // include all content recursively. diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 9a1e4b5f8..e6f96858a 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -107,12 +107,8 @@ func newMockAPI(t *testing.T) (*mockAPI, cid.Cid) { }, cids[0] } -func (api *mockAPI) Get(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *GetResponse, error) { - return api.gw.Get(ctx, immutablePath) -} - -func (api *mockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { - return api.gw.GetRange(ctx, immutablePath, ranges...) +func (api *mockAPI) Get(ctx context.Context, immutablePath ImmutablePath, ranges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { + return api.gw.Get(ctx, immutablePath, ranges...) } func (api *mockAPI) GetAll(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, files.Node, error) { diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 88fe1ed5b..c1efdfba7 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -29,7 +29,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h isDirectoryHeadRequest bool directoryMetadata *directoryMetadata err error - ranges []GetRange + ranges []ByteRange ) switch r.Method { @@ -60,23 +60,19 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h } var getResp *GetResponse - // TODO: passing resolved path here, instead of contentPath is harming content routing. Knowing original immutableContentPath will allow backend to find providers for parents, even when internal CIDs are not announced, and will provide better key for caching related DAGs. - if len(ranges) == 0 { - pathMetadata, getResp, err = i.api.Get(ctx, maybeResolvedImPath) - } else { - pathMetadata, getResp, err = i.api.GetRange(ctx, maybeResolvedImPath, ranges...) - } + // TODO: passing only resolved path here, instead of contentPath is + // harming content routing. Knowing original immutableContentPath will + // allow backend to find providers for parents, even when internal + // CIDs are not announced, and will provide better key for caching + // related DAGs. + pathMetadata, getResp, err = i.api.Get(ctx, maybeResolvedImPath, ranges...) if err != nil { if isWebRequest(requestedContentType) { forwardedPath, continueProcessing := i.handleWebRequestErrors(w, r, maybeResolvedImPath, immutableContentPath, contentPath, err, logger) if !continueProcessing { return false } - if len(ranges) == 0 { - pathMetadata, getResp, err = i.api.Get(ctx, forwardedPath) - } else { - pathMetadata, getResp, err = i.api.GetRange(ctx, forwardedPath, ranges...) - } + pathMetadata, getResp, err = i.api.Get(ctx, forwardedPath, ranges...) if err != nil { err = fmt.Errorf("failed to resolve %s: %w", debugStr(contentPath.String()), err) webError(w, err, http.StatusInternalServerError) @@ -100,7 +96,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h return false } - // TODO: check if we have a bug when maybeResolvedImPath is resolved and i.setIpfsRootsHeader works with pathMetadata returned by GetRange(maybeResolvedImPath) + // TODO: check if we have a bug when maybeResolvedImPath is resolved and i.setIpfsRootsHeader works with pathMetadata returned by Get(maybeResolvedImPath) if err := i.setIpfsRootsHeader(w, pathMetadata); err != nil { webRequestError(w, err) return false @@ -138,7 +134,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h } // parseRange parses a Range header string as per RFC 7233. -func parseRange(s string) ([]GetRange, error) { +func parseRange(s string) ([]ByteRange, error) { if s == "" { return nil, nil // header not present } @@ -146,7 +142,7 @@ func parseRange(s string) ([]GetRange, error) { if !strings.HasPrefix(s, b) { return nil, errors.New("invalid range") } - var ranges []GetRange + var ranges []ByteRange for _, ra := range strings.Split(s[len(b):], ",") { ra = textproto.TrimString(ra) if ra == "" { @@ -157,7 +153,7 @@ func parseRange(s string) ([]GetRange, error) { return nil, errors.New("invalid range") } start, end = textproto.TrimString(start), textproto.TrimString(end) - var r GetRange + var r ByteRange if start == "" { r.From = 0 // If no start is specified, end specifies the diff --git a/gateway/handler_test.go b/gateway/handler_test.go index bf488d03b..1e97f6e9f 100644 --- a/gateway/handler_test.go +++ b/gateway/handler_test.go @@ -45,11 +45,7 @@ type errorMockAPI struct { err error } -func (api *errorMockAPI) Get(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *GetResponse, error) { - return ContentPathMetadata{}, nil, api.err -} - -func (api *errorMockAPI) GetRange(ctx context.Context, path ImmutablePath, getRange ...GetRange) (ContentPathMetadata, *GetResponse, error) { +func (api *errorMockAPI) Get(ctx context.Context, path ImmutablePath, getRange ...ByteRange) (ContentPathMetadata, *GetResponse, error) { return ContentPathMetadata{}, nil, api.err } @@ -161,11 +157,7 @@ type panicMockAPI struct { panicOnHostnameHandler bool } -func (api *panicMockAPI) Get(ctx context.Context, immutablePath ImmutablePath) (ContentPathMetadata, *GetResponse, error) { - panic("i am panicking") -} - -func (api *panicMockAPI) GetRange(ctx context.Context, immutablePath ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { +func (api *panicMockAPI) Get(ctx context.Context, immutablePath ImmutablePath, ranges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { panic("i am panicking") } diff --git a/gateway/handler_unixfs_dir.go b/gateway/handler_unixfs_dir.go index 423af8285..d7648d9c6 100644 --- a/gateway/handler_unixfs_dir.go +++ b/gateway/handler_unixfs_dir.go @@ -23,7 +23,7 @@ import ( // serveDirectory returns the best representation of UnixFS directory // // It will return index.html if present, or generate directory listing otherwise. -func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, isHeadRequest bool, directoryMetadata *directoryMetadata, ranges []GetRange, begin time.Time, logger *zap.SugaredLogger) bool { +func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, isHeadRequest bool, directoryMetadata *directoryMetadata, ranges []ByteRange, begin time.Time, logger *zap.SugaredLogger) bool { ctx, span := spanTrace(ctx, "ServeDirectory", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() @@ -81,11 +81,7 @@ func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r * } } else { var getResp *GetResponse - if len(ranges) == 0 { - _, getResp, err = i.api.Get(ctx, imIndexPath) - } else { - _, getResp, err = i.api.GetRange(ctx, imIndexPath, ranges...) - } + _, getResp, err = i.api.Get(ctx, imIndexPath, ranges...) if err == nil { if getResp.bytes == nil { webError(w, fmt.Errorf("%q could not be read: %w", imIndexPath, files.ErrNotReader), http.StatusUnprocessableEntity) diff --git a/gateway/metrics.go b/gateway/metrics.go index ada179e57..bebd1d58f 100644 --- a/gateway/metrics.go +++ b/gateway/metrics.go @@ -55,25 +55,13 @@ func (b *ipfsBackendWithMetrics) updateApiCallMetric(name string, err error, beg } } -func (b *ipfsBackendWithMetrics) Get(ctx context.Context, path ImmutablePath) (ContentPathMetadata, *GetResponse, error) { +func (b *ipfsBackendWithMetrics) Get(ctx context.Context, path ImmutablePath, ranges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { begin := time.Now() name := "IPFSBackend.Get" - ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) - defer span.End() - - md, n, err := b.api.Get(ctx, path) - - b.updateApiCallMetric(name, err, begin) - return md, n, err -} - -func (b *ipfsBackendWithMetrics) GetRange(ctx context.Context, path ImmutablePath, ranges ...GetRange) (ContentPathMetadata, *GetResponse, error) { - begin := time.Now() - name := "IPFSBackend.GetRange" - ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()))) + ctx, span := spanTrace(ctx, name, trace.WithAttributes(attribute.String("path", path.String()), attribute.Int("ranges", len(ranges)))) defer span.End() - md, f, err := b.api.GetRange(ctx, path, ranges...) + md, f, err := b.api.Get(ctx, path, ranges...) b.updateApiCallMetric(name, err, begin) return md, f, err From de9daf5fc542083159408ece51e63da2ee85cd9d Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 1 Apr 2023 02:00:28 +0200 Subject: [PATCH 4/4] chore: remove bitswap from debug log blockstore can be used in contexts where there is no bitswap, and this log message will me EXTREMELY confusing, wasting people's time on debugging invalid side of system --- blockservice/blockservice.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/blockservice/blockservice.go b/blockservice/blockservice.go index f1a450834..773fb5303 100644 --- a/blockservice/blockservice.go +++ b/blockservice/blockservice.go @@ -244,7 +244,7 @@ func getBlock(ctx context.Context, c cid.Cid, bs blockstore.Blockstore, fget fun // TODO be careful checking ErrNotFound. If the underlying // implementation changes, this will break. - logger.Debug("Blockservice: Searching bitswap") + logger.Debug("BlockService: Searching") blk, err := f.GetBlock(ctx, c) if err != nil { return nil, err @@ -262,7 +262,7 @@ func getBlock(ctx context.Context, c cid.Cid, bs blockstore.Blockstore, fget fun return blk, nil } - logger.Debug("Blockservice GetBlock: Not found") + logger.Debug("BlockService GetBlock: Not found") return nil, err }