From 1eda842859b802b0f667871b731a62a24131b25e Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Mon, 27 Apr 2026 21:22:25 +0200 Subject: [PATCH 1/7] feat(graph): add MS Graph colon-syntax path lookup middleware Adds a chi middleware that detects MS Graph colon-syntax URLs and rewrites them to the canonical /items/{itemID}/... form before chi performs route matching. Existing handlers, routes, and GetDriveAndItemIDParam stay unchanged. Two URL shapes are recognized at both /v1.0 and /v1beta1: /drives/{driveID}/root:/[:/][:] /drives/{driveID}/items/{itemID}:/[:/][:] Path resolution runs as the request user via CS3 Stat. Both NOT_FOUND and PERMISSION_DENIED collapse to a 404 response so existence isn't disclosed to unauthorized callers. URLs without colon syntax fast-path through with a single substring check. The original URL is stashed in request context under OriginalPathContextKey for downstream tracing/logging. The middleware is registered as a top-level mux.Use so it runs before any route matching: chi middleware on a sub-router runs after the prefix is matched but cannot redirect to a different leaf route. Top-level middleware lets URL rewriting actually re-route the request. Tests cover regex matching across versions, all rewrite variants (root/items anchored, with/without suffix, with/without trailing colon, deep paths), NOT_FOUND -> 404, PERMISSION_DENIED -> 404 (security: no existence disclosure), and original-URL preservation in request context. --- services/graph/pkg/middleware/path_lookup.go | 197 ++++++++++++++++ .../graph/pkg/middleware/path_lookup_test.go | 219 ++++++++++++++++++ services/graph/pkg/service/v0/service.go | 6 + 3 files changed, 422 insertions(+) create mode 100644 services/graph/pkg/middleware/path_lookup.go create mode 100644 services/graph/pkg/middleware/path_lookup_test.go diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go new file mode 100644 index 0000000000..ff634ca81a --- /dev/null +++ b/services/graph/pkg/middleware/path_lookup.go @@ -0,0 +1,197 @@ +package middleware + +import ( + "context" + "net/http" + "net/url" + "regexp" + "strings" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + + "github.com/rs/zerolog" + + "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" + "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" + "github.com/opencloud-eu/reva/v2/pkg/storagespace" + "github.com/opencloud-eu/reva/v2/pkg/utils" +) + +// {HTTP.Root}/{version}/drives/{driveID}/root:/[:/][:] +// where {version} is "v1.0" or "v1beta1" — preserves the requested version +// so it lands on a registered route. +// group 1: prefix up to and including the driveID +// group 2: driveID (URL-encoded) +// group 3: path (with leading slash) +// group 4: suffix (with leading slash) — empty for direct item lookup +var rootColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/root:(/.+?)(?::(/[^:]+))?:?$`) + +// {HTTP.Root}/{version}/drives/{driveID}/items/{itemID}:/[:/][:] +// group 1: prefix up to and including the driveID +// group 2: driveID (URL-encoded) +// group 3: anchor itemID (URL-encoded) +// group 4: path (with leading slash) +// group 5: suffix (with leading slash) +var itemColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/items/([^/]+):(/.+?)(?::(/[^:]+))?:?$`) + +type contextKey string + +// OriginalPathContextKey holds the pre-rewrite request path for downstream +// tracing/logging consumers. +const OriginalPathContextKey contextKey = "graph.original_path" + +// ResolveGraphPath returns middleware that detects MS Graph colon-syntax +// path lookup URLs and rewrites them to the canonical +// /v1beta1/drives/{driveID}/items/{resolvedItemID}/{suffix} form before +// chi performs route matching. +// +// Two URL shapes are recognized: +// +// /v1beta1/drives/{driveID}/root:/[:/][:] +// /v1beta1/drives/{driveID}/items/{itemID}:/[:/][:] +// +// Path resolution runs as the request user via CS3 Stat. Both NOT_FOUND +// and PERMISSION_DENIED collapse to a 404 response so existence isn't +// disclosed to unauthorized callers. +// +// URLs without colon syntax fast-path through the middleware with a +// single substring check. +func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log.Logger) func(http.Handler) http.Handler { + l := logger.With().Str("middleware", "graphPathLookup").Logger() + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Fast-path: skip URLs that can't be colon-syntax + if !strings.Contains(r.URL.Path, ":") { + next.ServeHTTP(w, r) + return + } + + original := r.URL.Path + rewritten, matched := rewriteColonPath(r.Context(), gws, l, original) + if !matched { + next.ServeHTTP(w, r) + return + } + if rewritten == "" { + // Resolution failed: not found, permission denied, or invalid input. + // Collapse to 404 to avoid disclosing existence. + l.Debug().Str("original", original).Msg("colon-path resolution failed; returning 404") + errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") + return + } + + l.Debug(). + Str("original", original). + Str("rewritten", rewritten). + Msg("rewrote MS Graph colon-syntax path") + + // Stash original URL for downstream logging/tracing. + ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) + r = r.WithContext(ctx) + r.URL.Path = rewritten + r.URL.RawPath = "" + next.ServeHTTP(w, r) + }) + } +} + +// rewriteColonPath returns: +// - rewritten URL + true when the URL matched a colon-syntax pattern and resolution succeeded +// - "" + true when the URL matched a colon-syntax pattern but resolution failed (caller should 404) +// - "" + false when the URL did not match any colon-syntax pattern +func rewriteColonPath(ctx context.Context, gws pool.Selectable[gateway.GatewayAPIClient], logger zerolog.Logger, urlPath string) (string, bool) { + if m := rootColonRe.FindStringSubmatch(urlPath); m != nil { + prefix, driveIDStr, relPath, suffix := m[1], m[2], m[3], m[4] + driveID, err := decodeAndParseID(driveIDStr) + if err != nil { + logger.Debug().Err(err).Str("driveID", driveIDStr).Msg("invalid driveID in colon path") + return "", true + } + itemID, ok := resolvePath(ctx, gws, logger, &driveID, relPath) + if !ok { + return "", true + } + return buildCanonicalPath(prefix, itemID, suffix), true + } + + if m := itemColonRe.FindStringSubmatch(urlPath); m != nil { + prefix, _, anchorIDStr, relPath, suffix := m[1], m[2], m[3], m[4], m[5] + anchorID, err := decodeAndParseID(anchorIDStr) + if err != nil { + logger.Debug().Err(err).Str("itemID", anchorIDStr).Msg("invalid item anchor in colon path") + return "", true + } + itemID, ok := resolvePath(ctx, gws, logger, &anchorID, relPath) + if !ok { + return "", true + } + return buildCanonicalPath(prefix, itemID, suffix), true + } + + return "", false +} + +func resolvePath(ctx context.Context, gws pool.Selectable[gateway.GatewayAPIClient], logger zerolog.Logger, anchor *storageprovider.ResourceId, rawPath string) (string, bool) { + gw, err := gws.Next() + if err != nil { + logger.Error().Err(err).Msg("could not select gateway client") + return "", false + } + + decoded, err := url.PathUnescape(rawPath) + if err != nil { + logger.Debug().Err(err).Str("path", rawPath).Msg("failed to URL-decode path segment") + return "", false + } + + statRes, err := gw.Stat(ctx, &storageprovider.StatRequest{ + Ref: &storageprovider.Reference{ + ResourceId: anchor, + Path: utils.MakeRelativePath(decoded), + }, + }) + if err != nil { + logger.Error().Err(err).Msg("Stat call failed during colon-path resolution") + return "", false + } + + switch statRes.GetStatus().GetCode() { + case cs3rpc.Code_CODE_OK: + // fall through + case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: + // Both collapse to "not found" — never tell unauthorized callers + // that the resource exists. + return "", false + default: + logger.Debug(). + Str("code", statRes.GetStatus().GetCode().String()). + Str("message", statRes.GetStatus().GetMessage()). + Msg("unexpected Stat status during colon-path resolution") + return "", false + } + + id := statRes.GetInfo().GetId() + if id == nil { + return "", false + } + return storagespace.FormatResourceID(id), true +} + +func decodeAndParseID(s string) (storageprovider.ResourceId, error) { + decoded, err := url.PathUnescape(s) + if err != nil { + return storageprovider.ResourceId{}, err + } + return storagespace.ParseID(decoded) +} + +func buildCanonicalPath(prefix, itemID, suffix string) string { + // r.URL.Path is the decoded form per Go's net/http convention; chi tree + // matching reads it directly. Insert the raw itemID without escaping so + // chi's {itemID} param captures the same string the handler will see + // after parseIDParam unescapes (no-op for already-decoded chars). + return prefix + "/items/" + itemID + suffix +} diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go new file mode 100644 index 0000000000..11ffe87801 --- /dev/null +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -0,0 +1,219 @@ +package middleware_test + +import ( + "net/http" + "net/http/httptest" + "testing" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "google.golang.org/grpc" + + "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/graph/pkg/middleware" + "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" + cs3mocks "github.com/opencloud-eu/reva/v2/tests/cs3mocks/mocks" +) + +const ( + testDriveID = "storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd" + testItemID = "storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d" +) + +func newTestSelector(t *testing.T, gw *cs3mocks.GatewayAPIClient) pool.Selectable[gateway.GatewayAPIClient] { + t.Helper() + // Unique key per test so pool's selector cache doesn't hand back a stale gw. + svcName := "TestGatewaySelector" + key := "test.gateway." + t.Name() + pool.RemoveSelector(svcName + key) + return pool.GetSelector[gateway.GatewayAPIClient]( + svcName, + key, + func(cc grpc.ClientConnInterface) gateway.GatewayAPIClient { return gw }, + ) +} + +// statResponse builds a CS3 StatResponse with the given status code, optionally +// returning a resource info populated with testItemID for OK responses. +func statResponse(code cs3rpc.Code, withInfo bool) *storageprovider.StatResponse { + res := &storageprovider.StatResponse{Status: &cs3rpc.Status{Code: code}} + if withInfo { + res.Info = &storageprovider.ResourceInfo{ + Id: &storageprovider.ResourceId{ + StorageId: "storage-users-1", + SpaceId: "f503f6fe-2656-4b0f-8289-fb3184962dfd", + OpaqueId: "f0e20017-9cba-498a-87e5-3467b976604d", + }, + } + } + return res +} + +// recordingHandler captures the request URL path it receives so tests can assert +// the middleware rewrote (or passed through) correctly. +func recordingHandler() (http.Handler, *string) { + var seen string + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + seen = r.URL.Path + w.WriteHeader(http.StatusOK) + }), &seen +} + +func TestResolveGraphPath(t *testing.T) { + tests := []struct { + name string + urlPath string + statCode cs3rpc.Code + statErr error + expectStatCalled bool + expectStatus int + expectURLPath string // empty = handler not invoked + }{ + { + name: "non-colon URL passes through unchanged", + urlPath: "/graph/v1.0/me/drives", + expectStatCalled: false, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/me/drives", + }, + { + name: "URL with colon but not matching pattern passes through", + urlPath: "/graph/v1.0/some/other/route:/foo", + expectStatCalled: false, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/some/other/route:/foo", + }, + { + name: "v1.0 root-anchored with /children rewrites", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "v1beta1 root-anchored with /createLink rewrites", + urlPath: "/graph/v1beta1/drives/" + testDriveID + "/root:/Documents:/createLink", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1beta1/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/createLink", + }, + { + name: "trailing colon (no suffix) rewrites to bare item URL", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d", + }, + { + name: "no trailing colon, no suffix rewrites to bare item URL", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d", + }, + { + name: "deep path rewrites correctly", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents/Reports:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "item-anchored colon syntax rewrites", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/items/" + testItemID + ":/notes.txt:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "Stat NOT_FOUND returns 404 without invoking handler", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Missing:", + statCode: cs3rpc.Code_CODE_NOT_FOUND, + expectStatCalled: true, + expectStatus: http.StatusNotFound, + expectURLPath: "", // handler must NOT be called + }, + { + // CRITICAL security test: PERMISSION_DENIED must not leak existence. + // We collapse it to 404, identical to NOT_FOUND, so an unauthorized + // caller can't distinguish "doesn't exist" from "exists but you can't see it". + name: "Stat PERMISSION_DENIED returns 404 (not 403) — don't disclose existence", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Restricted:", + statCode: cs3rpc.Code_CODE_PERMISSION_DENIED, + expectStatCalled: true, + expectStatus: http.StatusNotFound, + expectURLPath: "", + }, + { + name: "Stat unexpected status returns 404", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Anything:", + statCode: cs3rpc.Code_CODE_INTERNAL, + expectStatCalled: true, + expectStatus: http.StatusNotFound, + expectURLPath: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + if tt.expectStatCalled { + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(tt.statCode, tt.statCode == cs3rpc.Code_CODE_OK), tt.statErr) + } + + selector := newTestSelector(t, gw) + handler, seen := recordingHandler() + mw := middleware.ResolveGraphPath(selector, log.NewLogger()) + + req := httptest.NewRequest(http.MethodGet, "http://localhost"+tt.urlPath, nil) + rr := httptest.NewRecorder() + + mw(handler).ServeHTTP(rr, req) + + assert.Equal(t, tt.expectStatus, rr.Code, "status code") + assert.Equal(t, tt.expectURLPath, *seen, "URL seen by next handler") + + if tt.expectStatCalled { + gw.AssertCalled(t, "Stat", mock.Anything, mock.Anything) + } else { + gw.AssertNotCalled(t, "Stat", mock.Anything, mock.Anything) + } + }) + } +} + +// TestResolveGraphPath_OriginalPathContext verifies the rewrite preserves the +// original URL in request context for downstream tracing/logging. +func TestResolveGraphPath_OriginalPathContext(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(cs3rpc.Code_CODE_OK, true), nil) + + selector := newTestSelector(t, gw) + original := "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:/children" + + var capturedOriginal interface{} + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedOriginal = r.Context().Value(middleware.OriginalPathContextKey) + w.WriteHeader(http.StatusOK) + }) + + mw := middleware.ResolveGraphPath(selector, log.NewLogger()) + req := httptest.NewRequest(http.MethodGet, "http://localhost"+original, nil) + rr := httptest.NewRecorder() + + mw(handler).ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + assert.Equal(t, original, capturedOriginal, "original URL must be available via OriginalPathContextKey") +} diff --git a/services/graph/pkg/service/v0/service.go b/services/graph/pkg/service/v0/service.go index 6c9880ba28..e71e0dd7d9 100644 --- a/services/graph/pkg/service/v0/service.go +++ b/services/graph/pkg/service/v0/service.go @@ -131,6 +131,12 @@ func NewService(opts ...Option) (Graph, error) { //nolint:maintidx m := chi.NewMux() m.Use(options.Middleware...) + // Must be top-level mux.Use, not r.Use inside a Route block: chi + // sub-router middleware runs after the prefix has already been matched, + // so URL rewriting at that point can't redirect to a different leaf + // route. Top-level middleware runs before any matching, so the rewritten + // URL is what chi walks the radix tree against. + m.Use(graphm.ResolveGraphPath(options.GatewaySelector, options.Logger)) m.Use( otelchi.Middleware( "graph", From 2d5693f1bf5421f687f96bc24706d2f34500fd57 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Mon, 27 Apr 2026 22:53:51 +0200 Subject: [PATCH 2/7] fix(graph): address Copilot review feedback on path lookup middleware - Drop double-decoding of URL path components. r.URL.Path is already decoded by net/http; calling url.PathUnescape again would let crafted inputs like "%252F" become "/", changing path semantics. Path and anchor-id strings now go straight to utils.MakeRelativePath / storagespace.ParseID. - Distinguish operational errors from "not found". Gateway selection failure, RPC transport errors, and unexpected CS3 status codes now surface as 500 (don't mask outages); only NOT_FOUND and PERMISSION_DENIED collapse to 404 (no existence disclosure). Implemented via a sentinel errPathNotFound + a 500 fall-through. - Refactor the two regex-handling branches in rewriteColonPath to share a resolution path via a small colonMatch struct + matchInto/extract helpers. Removes the SonarCloud duplication finding without changing behavior. - Update the docstring on ResolveGraphPath to reflect that the rewrite preserves the requested API version (/{version}/...) rather than hard-coding /v1beta1/... - Test cleanup: register t.Cleanup to remove the per-subtest selector entry from pool's global selectors map after the subtest, and update the "unexpected status" case to expect 500 (matches the new error semantics). --- services/graph/pkg/middleware/path_lookup.go | 200 +++++++++++------- .../graph/pkg/middleware/path_lookup_test.go | 9 +- 2 files changed, 125 insertions(+), 84 deletions(-) diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go index ff634ca81a..79d4b3e3ca 100644 --- a/services/graph/pkg/middleware/path_lookup.go +++ b/services/graph/pkg/middleware/path_lookup.go @@ -2,8 +2,9 @@ package middleware import ( "context" + "errors" + "fmt" "net/http" - "net/url" "regexp" "strings" @@ -24,15 +25,15 @@ import ( // where {version} is "v1.0" or "v1beta1" — preserves the requested version // so it lands on a registered route. // group 1: prefix up to and including the driveID -// group 2: driveID (URL-encoded) +// group 2: driveID // group 3: path (with leading slash) // group 4: suffix (with leading slash) — empty for direct item lookup var rootColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/root:(/.+?)(?::(/[^:]+))?:?$`) // {HTTP.Root}/{version}/drives/{driveID}/items/{itemID}:/[:/][:] // group 1: prefix up to and including the driveID -// group 2: driveID (URL-encoded) -// group 3: anchor itemID (URL-encoded) +// group 2: driveID +// group 3: anchor itemID // group 4: path (with leading slash) // group 5: suffix (with leading slash) var itemColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/items/([^/]+):(/.+?)(?::(/[^:]+))?:?$`) @@ -43,44 +44,57 @@ type contextKey string // tracing/logging consumers. const OriginalPathContextKey contextKey = "graph.original_path" +// errPathNotFound is the sentinel returned when a colon-syntax URL matches +// the pattern but the path either doesn't exist or the user lacks permission +// to see it. Both cases collapse to a single 404 — never disclose existence +// to unauthorized callers. Distinct from operational errors (gateway, +// transport, unexpected status), which surface as 5xx. +var errPathNotFound = errors.New("path not found") + // ResolveGraphPath returns middleware that detects MS Graph colon-syntax // path lookup URLs and rewrites them to the canonical -// /v1beta1/drives/{driveID}/items/{resolvedItemID}/{suffix} form before -// chi performs route matching. +// /{version}/drives/{driveID}/items/{resolvedItemID}{suffix} form before +// chi performs route matching. The requested API version is preserved +// (for example, "v1.0" or "v1beta1"). // // Two URL shapes are recognized: // -// /v1beta1/drives/{driveID}/root:/[:/][:] -// /v1beta1/drives/{driveID}/items/{itemID}:/[:/][:] +// /{version}/drives/{driveID}/root:/[:/][:] +// /{version}/drives/{driveID}/items/{itemID}:/[:/][:] // -// Path resolution runs as the request user via CS3 Stat. Both NOT_FOUND -// and PERMISSION_DENIED collapse to a 404 response so existence isn't -// disclosed to unauthorized callers. +// Path resolution runs as the request user via CS3 Stat. NOT_FOUND and +// PERMISSION_DENIED collapse to 404 (no existence disclosure); operational +// failures (gateway selection, RPC transport, unexpected status) surface +// as 5xx so outages aren't masked. // -// URLs without colon syntax fast-path through the middleware with a -// single substring check. +// URLs without colon syntax fast-path through with a single substring check. func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log.Logger) func(http.Handler) http.Handler { l := logger.With().Str("middleware", "graphPathLookup").Logger() return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Fast-path: skip URLs that can't be colon-syntax + // Fast-path: skip URLs that can't be colon-syntax. if !strings.Contains(r.URL.Path, ":") { next.ServeHTTP(w, r) return } original := r.URL.Path - rewritten, matched := rewriteColonPath(r.Context(), gws, l, original) - if !matched { - next.ServeHTTP(w, r) - return - } - if rewritten == "" { - // Resolution failed: not found, permission denied, or invalid input. - // Collapse to 404 to avoid disclosing existence. - l.Debug().Str("original", original).Msg("colon-path resolution failed; returning 404") + rewritten, err := rewriteColonPath(r.Context(), gws, l, original) + switch { + case errors.Is(err, errPathNotFound): + l.Debug().Str("original", original).Msg("colon-path resolution: not found") errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") return + case err != nil: + l.Error().Err(err).Str("original", original).Msg("colon-path resolution: internal error") + errorcode.GeneralException.Render( + w, r, http.StatusInternalServerError, "internal error resolving path", + ) + return + case rewritten == "": + // No colon-syntax match — pass through untouched. + next.ServeHTTP(w, r) + return } l.Debug(). @@ -88,7 +102,6 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. Str("rewritten", rewritten). Msg("rewrote MS Graph colon-syntax path") - // Stash original URL for downstream logging/tracing. ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) r = r.WithContext(ctx) r.URL.Path = rewritten @@ -98,94 +111,117 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. } } +// colonMatch is the normalized result of matching either the root- or +// item-anchored colon-syntax regex. The two regexes capture different +// groups; this struct erases that difference for downstream resolution. +type colonMatch struct { + prefix string // canonical prefix up to and including /drives/{driveID} + anchorIDStr string // resource id to anchor path resolution (driveID for root, itemID for item-anchored) + relPath string // relative path with leading slash + suffix string // suffix with leading slash (e.g. "/children"); may be empty +} + // rewriteColonPath returns: -// - rewritten URL + true when the URL matched a colon-syntax pattern and resolution succeeded -// - "" + true when the URL matched a colon-syntax pattern but resolution failed (caller should 404) -// - "" + false when the URL did not match any colon-syntax pattern -func rewriteColonPath(ctx context.Context, gws pool.Selectable[gateway.GatewayAPIClient], logger zerolog.Logger, urlPath string) (string, bool) { - if m := rootColonRe.FindStringSubmatch(urlPath); m != nil { - prefix, driveIDStr, relPath, suffix := m[1], m[2], m[3], m[4] - driveID, err := decodeAndParseID(driveIDStr) - if err != nil { - logger.Debug().Err(err).Str("driveID", driveIDStr).Msg("invalid driveID in colon path") - return "", true - } - itemID, ok := resolvePath(ctx, gws, logger, &driveID, relPath) - if !ok { - return "", true - } - return buildCanonicalPath(prefix, itemID, suffix), true +// - "" + nil — no colon-syntax pattern matched (passthrough) +// - rewritten + nil — matched and resolved to a canonical URL +// - "" + errPathNotFound — matched but path doesn't exist or user lacks permission (caller renders 404) +// - "" + other error — matched but operational/internal error (caller renders 5xx) +func rewriteColonPath( + ctx context.Context, + gws pool.Selectable[gateway.GatewayAPIClient], + logger zerolog.Logger, + urlPath string, +) (string, error) { + var match colonMatch + switch { + case matchInto(rootColonRe, urlPath, &match, rootMatchExtract): + case matchInto(itemColonRe, urlPath, &match, itemMatchExtract): + default: + return "", nil } - if m := itemColonRe.FindStringSubmatch(urlPath); m != nil { - prefix, _, anchorIDStr, relPath, suffix := m[1], m[2], m[3], m[4], m[5] - anchorID, err := decodeAndParseID(anchorIDStr) - if err != nil { - logger.Debug().Err(err).Str("itemID", anchorIDStr).Msg("invalid item anchor in colon path") - return "", true - } - itemID, ok := resolvePath(ctx, gws, logger, &anchorID, relPath) - if !ok { - return "", true - } - return buildCanonicalPath(prefix, itemID, suffix), true + // r.URL.Path is already URL-decoded by net/http; do NOT call url.PathUnescape + // here, that would double-decode and let crafted inputs like "%252F" become + // "/", changing path semantics. + anchor, err := storagespace.ParseID(match.anchorIDStr) + if err != nil { + // An unparseable anchor ID can't reference a real resource — collapse + // to "not found" rather than leaking parser internals via 5xx. + logger.Debug().Err(err).Str("anchor", match.anchorIDStr).Msg("invalid anchor id in colon path") + return "", errPathNotFound } - return "", false + itemID, err := resolvePath(ctx, gws, logger, &anchor, match.relPath) + if err != nil { + return "", err + } + return buildCanonicalPath(match.prefix, itemID, match.suffix), nil } -func resolvePath(ctx context.Context, gws pool.Selectable[gateway.GatewayAPIClient], logger zerolog.Logger, anchor *storageprovider.ResourceId, rawPath string) (string, bool) { - gw, err := gws.Next() - if err != nil { - logger.Error().Err(err).Msg("could not select gateway client") - return "", false +// matchInto runs the regex; on a hit, populates *out via extract and returns true. +// A small indirection that lets the switch in rewriteColonPath stay flat. +func matchInto(re *regexp.Regexp, s string, out *colonMatch, extract func([]string) colonMatch) bool { + m := re.FindStringSubmatch(s) + if m == nil { + return false } + *out = extract(m) + return true +} + +func rootMatchExtract(m []string) colonMatch { + return colonMatch{prefix: m[1], anchorIDStr: m[2], relPath: m[3], suffix: m[4]} +} + +func itemMatchExtract(m []string) colonMatch { + return colonMatch{prefix: m[1], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} +} - decoded, err := url.PathUnescape(rawPath) +// resolvePath translates a relative filesystem path (anchored at the given +// CS3 resource id) into the resolved item's id, running with the request +// user's permissions via CS3 Stat. +func resolvePath( + ctx context.Context, + gws pool.Selectable[gateway.GatewayAPIClient], + logger zerolog.Logger, + anchor *storageprovider.ResourceId, + rawPath string, +) (string, error) { + gw, err := gws.Next() if err != nil { - logger.Debug().Err(err).Str("path", rawPath).Msg("failed to URL-decode path segment") - return "", false + return "", fmt.Errorf("gateway selector: %w", err) } + // rawPath comes from r.URL.Path which is already decoded by net/http — + // no extra unescape (would double-decode crafted "%25xx" inputs). statRes, err := gw.Stat(ctx, &storageprovider.StatRequest{ Ref: &storageprovider.Reference{ ResourceId: anchor, - Path: utils.MakeRelativePath(decoded), + Path: utils.MakeRelativePath(rawPath), }, }) if err != nil { - logger.Error().Err(err).Msg("Stat call failed during colon-path resolution") - return "", false + return "", fmt.Errorf("CS3 Stat: %w", err) } switch statRes.GetStatus().GetCode() { case cs3rpc.Code_CODE_OK: // fall through case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: - // Both collapse to "not found" — never tell unauthorized callers - // that the resource exists. - return "", false + return "", errPathNotFound default: - logger.Debug(). - Str("code", statRes.GetStatus().GetCode().String()). - Str("message", statRes.GetStatus().GetMessage()). - Msg("unexpected Stat status during colon-path resolution") - return "", false + return "", fmt.Errorf( + "CS3 Stat returned %s: %s", + statRes.GetStatus().GetCode(), + statRes.GetStatus().GetMessage(), + ) } id := statRes.GetInfo().GetId() if id == nil { - return "", false - } - return storagespace.FormatResourceID(id), true -} - -func decodeAndParseID(s string) (storageprovider.ResourceId, error) { - decoded, err := url.PathUnescape(s) - if err != nil { - return storageprovider.ResourceId{}, err + return "", fmt.Errorf("CS3 Stat returned OK but missing Info.Id") } - return storagespace.ParseID(decoded) + return storagespace.FormatResourceID(id), nil } func buildCanonicalPath(prefix, itemID, suffix string) string { diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go index 11ffe87801..e12df34c5b 100644 --- a/services/graph/pkg/middleware/path_lookup_test.go +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -26,9 +26,12 @@ const ( func newTestSelector(t *testing.T, gw *cs3mocks.GatewayAPIClient) pool.Selectable[gateway.GatewayAPIClient] { t.Helper() // Unique key per test so pool's selector cache doesn't hand back a stale gw. + // t.Cleanup removes the entry from pool's global selectors map after the + // subtest, so global state doesn't grow across the suite. svcName := "TestGatewaySelector" key := "test.gateway." + t.Name() pool.RemoveSelector(svcName + key) + t.Cleanup(func() { pool.RemoveSelector(svcName + key) }) return pool.GetSelector[gateway.GatewayAPIClient]( svcName, key, @@ -154,11 +157,13 @@ func TestResolveGraphPath(t *testing.T) { expectURLPath: "", }, { - name: "Stat unexpected status returns 404", + // Operational/unexpected CS3 statuses must NOT be collapsed to 404 — + // that would mask outages. Surface as 500 like other graph handlers do. + name: "Stat unexpected status returns 500 (not 404 — don't mask outages)", urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Anything:", statCode: cs3rpc.Code_CODE_INTERNAL, expectStatCalled: true, - expectStatus: http.StatusNotFound, + expectStatus: http.StatusInternalServerError, expectURLPath: "", }, } From 1fc1bd4ebc728d13b5bf6acc7b86bfe850ec4b4f Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 28 Apr 2026 10:30:16 +0200 Subject: [PATCH 3/7] fix(graph): preserve RawPath workaround + use NopLogger in tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up Copilot review nits on the colon-syntax middleware: - Don't blank r.URL.RawPath after the rewrite. Graph.ServeHTTP sets RawPath = EscapedPath() as a workaround for chi's parameter-binding quirks with `$`/`!` in IDs (see go-chi/chi#641). Clearing RawPath for rewritten requests negates that workaround. Re-establish the same invariant after the rewrite. - Tests previously used log.NewLogger(), which mutates global zerolog state. Switch to log.NopLogger() — order-independent, no global side effects. --- services/graph/pkg/middleware/path_lookup.go | 13 ++++++++++++- services/graph/pkg/middleware/path_lookup_test.go | 4 ++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go index 79d4b3e3ca..0c11ddc411 100644 --- a/services/graph/pkg/middleware/path_lookup.go +++ b/services/graph/pkg/middleware/path_lookup.go @@ -105,7 +105,18 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) r = r.WithContext(ctx) r.URL.Path = rewritten - r.URL.RawPath = "" + // Match the chi-escaping workaround in Graph.ServeHTTP — RawPath + // must be a properly-escaped form of Path so chi's parameter + // binding works for rewritten requests too. Drive/item IDs + // contain `$` and `!`, both of which need percent-encoding for + // chi to round-trip them through its tree match. + // (See services/graph/pkg/service/v0/graph.go ServeHTTP and + // https://github.com/go-chi/chi/issues/641#issuecomment-883156692.) + // + // EscapedPath() re-encodes Path because the existing RawPath + // (set by Graph.ServeHTTP for the original URL) no longer + // unescapes to our rewritten Path. + r.URL.RawPath = r.URL.EscapedPath() next.ServeHTTP(w, r) }) } diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go index e12df34c5b..bc1093ed57 100644 --- a/services/graph/pkg/middleware/path_lookup_test.go +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -178,7 +178,7 @@ func TestResolveGraphPath(t *testing.T) { selector := newTestSelector(t, gw) handler, seen := recordingHandler() - mw := middleware.ResolveGraphPath(selector, log.NewLogger()) + mw := middleware.ResolveGraphPath(selector, log.NopLogger()) req := httptest.NewRequest(http.MethodGet, "http://localhost"+tt.urlPath, nil) rr := httptest.NewRecorder() @@ -213,7 +213,7 @@ func TestResolveGraphPath_OriginalPathContext(t *testing.T) { w.WriteHeader(http.StatusOK) }) - mw := middleware.ResolveGraphPath(selector, log.NewLogger()) + mw := middleware.ResolveGraphPath(selector, log.NopLogger()) req := httptest.NewRequest(http.MethodGet, "http://localhost"+original, nil) rr := httptest.NewRecorder() From d177df00f69f756d4acc8f4893293c8c253559e1 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 28 Apr 2026 10:55:39 +0200 Subject: [PATCH 4/7] fix(graph): map UNAUTHENTICATED to 401, validate drive/item id pair MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three more Copilot review nits on the colon-syntax middleware: - CS3 Stat returning UNAUTHENTICATED now surfaces as HTTP 401 (was 500 via the default-case fallback). Distinct sentinel + handler branch. - Item-anchored form (/drives/{driveID}/items/{itemID}:/...) now validates that driveID's storage/space prefix matches the itemID's, short-circuiting to 400 InvalidRequest on mismatch instead of doing a CS3 Stat that would only fail at the handler layer. - ParseID failures on the anchor id now surface as 400 (was 404). In practice this branch is defensive — storagespace.ParseID is lenient and only errors on empty input, which the regex already filters out — but the right semantic for malformed client input is 400, not 404. Tests: added two new cases (UNAUTHENTICATED → 401, drive/item id mismatch → 400). The "malformed drive id" case Copilot suggested isn't reachable in practice given ParseID's leniency, so it's not covered. --- services/graph/pkg/middleware/path_lookup.go | 71 +++++++++++++++---- .../graph/pkg/middleware/path_lookup_test.go | 20 ++++++ 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go index 0c11ddc411..a13c099d3e 100644 --- a/services/graph/pkg/middleware/path_lookup.go +++ b/services/graph/pkg/middleware/path_lookup.go @@ -44,12 +44,20 @@ type contextKey string // tracing/logging consumers. const OriginalPathContextKey contextKey = "graph.original_path" -// errPathNotFound is the sentinel returned when a colon-syntax URL matches -// the pattern but the path either doesn't exist or the user lacks permission -// to see it. Both cases collapse to a single 404 — never disclose existence -// to unauthorized callers. Distinct from operational errors (gateway, -// transport, unexpected status), which surface as 5xx. -var errPathNotFound = errors.New("path not found") +// Sentinels distinguishing the resolution outcomes that map to specific HTTP +// statuses. Anything else surfaces as 500. +// +// errPathNotFound — path doesn't exist or the user lacks permission to +// see it. Both collapse to 404 (no existence disclosure). +// errInvalidRequest — client sent a malformed input (unparseable drive/item +// id, drive/item mismatch in item-anchored form). 400. +// errUnauthenticated — the gateway said the caller isn't authenticated for +// the lookup (token expired, cross-storage auth, etc.). 401. +var ( + errPathNotFound = errors.New("path not found") + errInvalidRequest = errors.New("invalid request") + errUnauthenticated = errors.New("unauthenticated") +) // ResolveGraphPath returns middleware that detects MS Graph colon-syntax // path lookup URLs and rewrites them to the canonical @@ -85,6 +93,14 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. l.Debug().Str("original", original).Msg("colon-path resolution: not found") errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") return + case errors.Is(err, errInvalidRequest): + l.Debug().Str("original", original).Msg("colon-path resolution: invalid request") + errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, "invalid request") + return + case errors.Is(err, errUnauthenticated): + l.Debug().Str("original", original).Msg("colon-path resolution: unauthenticated") + errorcode.Unauthenticated.Render(w, r, http.StatusUnauthorized, "unauthenticated") + return case err != nil: l.Error().Err(err).Str("original", original).Msg("colon-path resolution: internal error") errorcode.GeneralException.Render( @@ -127,16 +143,19 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. // groups; this struct erases that difference for downstream resolution. type colonMatch struct { prefix string // canonical prefix up to and including /drives/{driveID} + driveIDStr string // captured driveID for item-anchored form (empty for root-anchored, where anchorIDStr already IS the driveID) anchorIDStr string // resource id to anchor path resolution (driveID for root, itemID for item-anchored) relPath string // relative path with leading slash suffix string // suffix with leading slash (e.g. "/children"); may be empty } // rewriteColonPath returns: -// - "" + nil — no colon-syntax pattern matched (passthrough) -// - rewritten + nil — matched and resolved to a canonical URL -// - "" + errPathNotFound — matched but path doesn't exist or user lacks permission (caller renders 404) -// - "" + other error — matched but operational/internal error (caller renders 5xx) +// - "" + nil — no colon-syntax pattern matched (passthrough) +// - rewritten + nil — matched and resolved to a canonical URL +// - "" + errPathNotFound — path doesn't exist or user lacks permission (404) +// - "" + errInvalidRequest — malformed input (400) +// - "" + errUnauthenticated — gateway said caller isn't authenticated (401) +// - "" + other error — operational / internal failure (5xx) func rewriteColonPath( ctx context.Context, gws pool.Selectable[gateway.GatewayAPIClient], @@ -156,10 +175,28 @@ func rewriteColonPath( // "/", changing path semantics. anchor, err := storagespace.ParseID(match.anchorIDStr) if err != nil { - // An unparseable anchor ID can't reference a real resource — collapse - // to "not found" rather than leaking parser internals via 5xx. + // Unparseable input is malformed by the client, not "not found". logger.Debug().Err(err).Str("anchor", match.anchorIDStr).Msg("invalid anchor id in colon path") - return "", errPathNotFound + return "", errInvalidRequest + } + + // Item-anchored form ("/drives/{driveID}/items/{itemID}:/...") captures + // driveID separately. Validate the itemID belongs to the given driveID + // (storage + space prefix) — otherwise the request is malformed and we + // short-circuit instead of doing an unnecessary CS3 Stat. + if match.driveIDStr != "" { + drive, err := storagespace.ParseID(match.driveIDStr) + if err != nil { + logger.Debug().Err(err).Str("driveID", match.driveIDStr).Msg("invalid drive id in colon path") + return "", errInvalidRequest + } + if drive.GetStorageId() != anchor.GetStorageId() || drive.GetSpaceId() != anchor.GetSpaceId() { + logger.Debug(). + Str("driveID", match.driveIDStr). + Str("itemID", match.anchorIDStr). + Msg("drive id does not match item id storage/space") + return "", errInvalidRequest + } } itemID, err := resolvePath(ctx, gws, logger, &anchor, match.relPath) @@ -181,11 +218,15 @@ func matchInto(re *regexp.Regexp, s string, out *colonMatch, extract func([]stri } func rootMatchExtract(m []string) colonMatch { + // Root-anchored: anchorIDStr IS the driveID, so leave driveIDStr empty + // to skip the drive/item match check (it would compare driveID to itself). return colonMatch{prefix: m[1], anchorIDStr: m[2], relPath: m[3], suffix: m[4]} } func itemMatchExtract(m []string) colonMatch { - return colonMatch{prefix: m[1], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} + // Item-anchored: capture driveID (m[2]) so the resolver can validate it + // matches the itemID's storage/space. + return colonMatch{prefix: m[1], driveIDStr: m[2], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} } // resolvePath translates a relative filesystem path (anchored at the given @@ -220,6 +261,8 @@ func resolvePath( // fall through case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: return "", errPathNotFound + case cs3rpc.Code_CODE_UNAUTHENTICATED: + return "", errUnauthenticated default: return "", fmt.Errorf( "CS3 Stat returned %s: %s", diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go index bc1093ed57..768bbccd52 100644 --- a/services/graph/pkg/middleware/path_lookup_test.go +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -166,6 +166,26 @@ func TestResolveGraphPath(t *testing.T) { expectStatus: http.StatusInternalServerError, expectURLPath: "", }, + { + // UNAUTHENTICATED is its own distinct outcome — must surface as 401, + // not 500, so clients can detect "your token is bad" vs "server error". + name: "Stat UNAUTHENTICATED returns 401", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:", + statCode: cs3rpc.Code_CODE_UNAUTHENTICATED, + expectStatCalled: true, + expectStatus: http.StatusUnauthorized, + expectURLPath: "", + }, + { + // Item-anchored form with a driveID that doesn't match the itemID's + // storage/space — the request is malformed; short-circuit to 400 + // instead of doing a Stat that would only fail downstream. + name: "drive id and item id storage/space mismatch returns 400", + urlPath: "/graph/v1.0/drives/storage-users-2$other-space-id/items/" + testItemID + ":/notes.txt:/children", + expectStatCalled: false, + expectStatus: http.StatusBadRequest, + expectURLPath: "", + }, } for _, tt := range tests { From b920282ad30308a6b75c90cb0dc325174c9f38bd Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 5 May 2026 20:10:48 +0200 Subject: [PATCH 5/7] refactor(graph): drop MS Graph framing from colon-path middleware logs --- services/graph/pkg/middleware/path_lookup.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go index a13c099d3e..dab46fee9d 100644 --- a/services/graph/pkg/middleware/path_lookup.go +++ b/services/graph/pkg/middleware/path_lookup.go @@ -59,8 +59,8 @@ var ( errUnauthenticated = errors.New("unauthenticated") ) -// ResolveGraphPath returns middleware that detects MS Graph colon-syntax -// path lookup URLs and rewrites them to the canonical +// ResolveGraphPath returns middleware that detects colon-syntax path +// lookup URLs and rewrites them to the canonical // /{version}/drives/{driveID}/items/{resolvedItemID}{suffix} form before // chi performs route matching. The requested API version is preserved // (for example, "v1.0" or "v1beta1"). @@ -116,7 +116,7 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. l.Debug(). Str("original", original). Str("rewritten", rewritten). - Msg("rewrote MS Graph colon-syntax path") + Msg("colon-path resolution: rewrote") ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) r = r.WithContext(ctx) From adce092ba1ae92caad7ef337a28662393a6ef086 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Wed, 6 May 2026 12:46:42 +0200 Subject: [PATCH 6/7] test(graph): acceptance tests for MS Graph colon-syntax path lookup Cover the rewrite shapes the middleware handles end-to-end against a real OpenCloud server: root-anchored, item-anchored, deep paths, trailing colon, and the "/:/" sub-route form. Also assert that NOT_FOUND and PERMISSION_DENIED both collapse to 404. The /permissions sub-route is registered only at /v1beta1, and the v1beta1 GetDriveItem handler is share-jail-only, so the v1beta1 mount of the middleware is exercised through the permissions scenario, since there is no other v1beta1 endpoint that works for regular personal-drive items. --- tests/acceptance/bootstrap/GraphContext.php | 165 ++++++++++++++++++ .../apiGraph/colonSyntaxPathLookup.feature | 114 ++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature diff --git a/tests/acceptance/bootstrap/GraphContext.php b/tests/acceptance/bootstrap/GraphContext.php index 49782fa137..3619169d50 100644 --- a/tests/acceptance/bootstrap/GraphContext.php +++ b/tests/acceptance/bootstrap/GraphContext.php @@ -3336,4 +3336,169 @@ public function userHasUnmarkedItemFromSpaceAsFavoriteUsingTheGraphApi( $response = $this->unmarkFavorite($user, $itemId); $this->featureContext->theHTTPStatusCodeShouldBe(204, '', $response); } + + /** + * Encode a colon-syntax path segment so slashes survive but the + * structural ":" delimiters of the Graph URL remain literal. + * + * @param string $path + * + * @return string + */ + private function encodeColonPathSegment(string $path): string { + $path = \ltrim($path, '/'); + $parts = \explode('/', $path); + $encoded = \array_map('rawurlencode', $parts); + return \implode('/', $encoded); + } + + /** + * Send a Graph API request and capture the response on FeatureContext. + * + * @param string $user + * @param string $method + * @param string $relativeUrl + * + * @return void + */ + private function sendGraphRequestAndCaptureResponse( + string $user, + string $method, + string $relativeUrl + ): void { + $response = $this->featureContext->sendingToWithDirectUrl($user, $method, $relativeUrl); + $this->featureContext->setResponse($response); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}, exercising the + * colon-syntax path lookup middleware (root-anchored, no suffix). + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfSpace( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of space "([^"]*)" with trailing colon using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}: with the optional + * trailing ":" — verifies the middleware accepts both forms. + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfSpaceWithTrailingColon( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded:"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" relative to folder "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/items/{itemID}:/{relPath}, the + * item-anchored colon-syntax form. + * + * @param string $user + * @param string $relPath + * @param string $folderName + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathRelativeToFolderOfSpace( + string $user, + string $relPath, + string $folderName, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $folderId = $this->spacesContext->getResourceId($user, $spaceName, $folderName); + $encoded = $this->encodeColonPathSegment($relPath); + $url = "/graph/$apiVersion/drives/$driveId/items/$folderId:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" lists permissions of the drive item with colon path "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}:/permissions, the + * "colon path with suffix" form (root-anchored colon path + canonical + * sub-route). + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userListsPermissionsOfDriveItemWithColonPathOfSpace( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded:/permissions"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of the personal space of "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Same as userGetsDriveItemWithColonPathOfSpace, but the request is + * issued by :user against the personal space drive ID of :owner. Used + * for security tests where one user attempts to reach another user's + * resource via colon syntax — should be indistinguishable from a + * not-found result. + * + * @param string $user + * @param string $path + * @param string $owner + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfPersonalSpaceOf( + string $user, + string $path, + string $owner, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($owner, "Personal"); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } } diff --git a/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature b/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature new file mode 100644 index 0000000000..d8e445f769 --- /dev/null +++ b/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature @@ -0,0 +1,114 @@ +Feature: colon-syntax path lookup on the Graph API + As a client + I want to address drive items by path using colon-syntax URLs on the Graph API + So that I do not have to walk the path with successive lookups before issuing a request + + The colon-syntax shapes recognised by the path-lookup middleware are: + + /graph/{version}/drives/{driveID}/root:/[:/][:] + /graph/{version}/drives/{driveID}/items/{itemID}:/[:/][:] + + Both /v1.0 and /v1beta1 are supported. NOT_FOUND and PERMISSION_DENIED + collapse to 404 so the middleware never discloses the existence of + resources the caller is not allowed to see. + + Background: + Given user "Alice" has been created with default attributes + And user "Alice" has created folder "folder1" + And user "Alice" has created folder "folder1/sub" + And user "Alice" has uploaded file with content "hello" to "folder1/file.txt" + And user "Alice" has uploaded file with content "deep" to "folder1/sub/deep.txt" + + + Scenario: get a drive item by root-anchored colon path + When user "Alice" gets the drive item with colon path "folder1/file.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name", "parentReference"], + "properties": { + "id": { + "type": "string", + "pattern": "^%file_id_pattern%$" + }, + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: get a drive item by root-anchored colon path with a deep path + When user "Alice" gets the drive item with colon path "folder1/sub/deep.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "deep.txt" + } + } + } + """ + + + Scenario: get a drive item by root-anchored colon path with a trailing colon + When user "Alice" gets the drive item with colon path "folder1/file.txt" of space "Personal" with trailing colon using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: get a drive item by item-anchored colon path + When user "Alice" gets the drive item with colon path "file.txt" relative to folder "folder1" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: list permissions of a drive item via colon path with a sub-route suffix on v1beta1 + # Exercises the "/:/" rewrite shape and, at the same time, + # the v1beta1 mount of the middleware. The /permissions sub-route is only + # registered at /v1beta1/, and the canonical /v1beta1 GetDriveItem + # handler is share-jail-only, so this is the cleanest way to assert that + # the v1beta1 colon-syntax path actually reaches a working handler for + # regular drive items. + When user "Alice" lists permissions of the drive item with colon path "folder1" of space "Personal" using the Graph API version "v1beta1" + Then the HTTP status code should be "200" + + + Scenario: non-existent colon path returns 404 + When user "Alice" gets the drive item with colon path "folder1/does-not-exist.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "404" + + + Scenario: another user cannot disclose existence of a resource via colon path + Given user "Brian" has been created with default attributes + When user "Brian" gets the drive item with colon path "folder1/file.txt" of the personal space of "Alice" using the Graph API version "v1.0" + Then the HTTP status code should be "404" From fa4f72d582b6bfa334a094aceccd2b9374c7b6b2 Mon Sep 17 00:00:00 2001 From: Dominik Schmidt Date: Tue, 12 May 2026 09:16:46 +0200 Subject: [PATCH 7/7] test(graph): pin chi URLParam round-trip for IDs with `!` sub-delim Codacy flagged the colon-path middleware comment claiming both `$` and `!` need percent-encoding for chi's tree match, while the implementation only calls r.URL.RawPath = r.URL.EscapedPath() which does not encode either character per the suggestion's reading. In practice EscapedPath does encode `!` as `%21` (only `?` is the hardcoded escape, `!` is escaped because Go's net/url treats it as needing encoding outside specific contexts). It leaves `$` literal, which chi handles fine. chi.URLParam returns the encoded segment verbatim, and downstream OpenCloud handlers (parseIDParam, GetDriveAndItemIDParam) already PathUnescape before parsing IDs, so the round-trip works end-to-end. The acceptance tests on this branch already exercise this with real `$`/`!`-containing IDs. Add a focused unit test that mounts the middleware behind chi, sends a colon URL, and asserts the actual contract: - driveID (only `$`): chi.URLParam returns it literal - itemID (with `!`): PathUnescape(chi.URLParam(...)) == original Update the misleading comment so future readers (and reviewers) see what the encoding actually does and which downstream contract it relies on. --- services/graph/pkg/middleware/path_lookup.go | 24 ++++---- .../graph/pkg/middleware/path_lookup_test.go | 59 +++++++++++++++++++ 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go index dab46fee9d..514a0f6402 100644 --- a/services/graph/pkg/middleware/path_lookup.go +++ b/services/graph/pkg/middleware/path_lookup.go @@ -121,17 +121,21 @@ func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log. ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) r = r.WithContext(ctx) r.URL.Path = rewritten - // Match the chi-escaping workaround in Graph.ServeHTTP — RawPath - // must be a properly-escaped form of Path so chi's parameter - // binding works for rewritten requests too. Drive/item IDs - // contain `$` and `!`, both of which need percent-encoding for - // chi to round-trip them through its tree match. - // (See services/graph/pkg/service/v0/graph.go ServeHTTP and - // https://github.com/go-chi/chi/issues/641#issuecomment-883156692.) + // Match the chi-escaping workaround in Graph.ServeHTTP: RawPath + // must be a valid encoded form of Path so chi's tree match (which + // reads RawPath when non-empty) routes the rewritten URL the same + // way it routes the original. The previous RawPath (set by + // Graph.ServeHTTP) was for the pre-rewrite Path and no longer + // matches; EscapedPath recomputes a canonical encoding for the + // new Path. // - // EscapedPath() re-encodes Path because the existing RawPath - // (set by Graph.ServeHTTP for the original URL) no longer - // unescapes to our rewritten Path. + // Drive/item IDs in this codebase have the shape + // {storage}${space}!{opaque}. EscapedPath leaves `$` literal (not + // escaped in path segments per RFC 3986) but encodes `!` as `%21`. + // chi.URLParam therefore returns the param with `%21`; downstream + // handlers (e.g. parseIDParam, GetDriveAndItemIDParam) already + // call url.PathUnescape before parsing the ID, so the round-trip + // works. See go-chi/chi#641 for the underlying chi behavior. r.URL.RawPath = r.URL.EscapedPath() next.ServeHTTP(w, r) }) diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go index 768bbccd52..b4509550de 100644 --- a/services/graph/pkg/middleware/path_lookup_test.go +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -3,11 +3,13 @@ package middleware_test import ( "net/http" "net/http/httptest" + "net/url" "testing" gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/go-chi/chi/v5" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "google.golang.org/grpc" @@ -217,6 +219,63 @@ func TestResolveGraphPath(t *testing.T) { } } +// TestResolveGraphPath_ChiParamRoundTripWithSubDelims pins down what chi +// actually returns from URLParam for rewritten URLs whose IDs contain `$` +// and `!`, and verifies the PathUnescape round-trip downstream handlers +// rely on still recovers the original ID. +// +// Specifically: r.URL.RawPath = r.URL.EscapedPath() leaves `$` literal but +// encodes `!` as `%21` (net/url's encodePath behavior). chi.URLParam returns +// the matched RawPath segment as-is, so the bound param contains `%21`. +// Existing handlers (parseIDParam, GetDriveAndItemIDParam) call +// url.PathUnescape on the param before parsing the ID, which recovers `!`. +// +// This test guards against any future change to the encoding strategy that +// would silently break that round-trip, for example, switching to +// url.PathEscape(itemID) and stuffing the result into r.URL.Path (which +// expects the decoded form) and thereby double-encoding `!` to `%2521`. +func TestResolveGraphPath_ChiParamRoundTripWithSubDelims(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(cs3rpc.Code_CODE_OK, true), nil) + + selector := newTestSelector(t, gw) + + var gotDriveID, gotItemID string + r := chi.NewRouter() + r.Use(middleware.ResolveGraphPath(selector, log.NopLogger())) + r.Route("/graph/v1.0/drives/{driveID}", func(r chi.Router) { + r.Get("/items/{itemID}/children", func(w http.ResponseWriter, r *http.Request) { + gotDriveID = chi.URLParam(r, "driveID") + gotItemID = chi.URLParam(r, "itemID") + w.WriteHeader(http.StatusOK) + }) + }) + + req := httptest.NewRequest( + http.MethodGet, + "http://localhost/graph/v1.0/drives/"+testDriveID+"/root:/Documents:/children", + nil, + ) + rr := httptest.NewRecorder() + r.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code, "chi must route the rewritten URL to the handler") + + // driveID has only `$` (a sub-delim left literal by EscapedPath), so chi + // returns it without any percent-encoding. + assert.Equal(t, testDriveID, gotDriveID, + "chi.URLParam should return driveID unchanged: only `$` sub-delim, left literal by EscapedPath") + + // itemID has both `$` and `!`. EscapedPath encodes `!` as `%21`, so chi + // returns the param with `%21` literal; PathUnescape recovers the + // original ID. This is the round-trip downstream handlers rely on. + unescaped, err := url.PathUnescape(gotItemID) + assert.NoError(t, err, "URLParam value must be a valid percent-encoded string") + assert.Equal(t, testItemID, unescaped, + "PathUnescape(chi.URLParam(itemID)) must recover the original ID") +} + // TestResolveGraphPath_OriginalPathContext verifies the rewrite preserves the // original URL in request context for downstream tracing/logging. func TestResolveGraphPath_OriginalPathContext(t *testing.T) {