diff --git a/services/graph/pkg/middleware/path_lookup.go b/services/graph/pkg/middleware/path_lookup.go new file mode 100644 index 0000000000..514a0f6402 --- /dev/null +++ b/services/graph/pkg/middleware/path_lookup.go @@ -0,0 +1,291 @@ +package middleware + +import ( + "context" + "errors" + "fmt" + "net/http" + "regexp" + "strings" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + + "github.com/rs/zerolog" + + "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" + "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" + "github.com/opencloud-eu/reva/v2/pkg/storagespace" + "github.com/opencloud-eu/reva/v2/pkg/utils" +) + +// {HTTP.Root}/{version}/drives/{driveID}/root:/[:/][:] +// where {version} is "v1.0" or "v1beta1" — preserves the requested version +// so it lands on a registered route. +// group 1: prefix up to and including the driveID +// group 2: driveID +// group 3: path (with leading slash) +// group 4: suffix (with leading slash) — empty for direct item lookup +var rootColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/root:(/.+?)(?::(/[^:]+))?:?$`) + +// {HTTP.Root}/{version}/drives/{driveID}/items/{itemID}:/[:/][:] +// group 1: prefix up to and including the driveID +// group 2: driveID +// group 3: anchor itemID +// group 4: path (with leading slash) +// group 5: suffix (with leading slash) +var itemColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/items/([^/]+):(/.+?)(?::(/[^:]+))?:?$`) + +type contextKey string + +// OriginalPathContextKey holds the pre-rewrite request path for downstream +// tracing/logging consumers. +const OriginalPathContextKey contextKey = "graph.original_path" + +// Sentinels distinguishing the resolution outcomes that map to specific HTTP +// statuses. Anything else surfaces as 500. +// +// errPathNotFound — path doesn't exist or the user lacks permission to +// see it. Both collapse to 404 (no existence disclosure). +// errInvalidRequest — client sent a malformed input (unparseable drive/item +// id, drive/item mismatch in item-anchored form). 400. +// errUnauthenticated — the gateway said the caller isn't authenticated for +// the lookup (token expired, cross-storage auth, etc.). 401. +var ( + errPathNotFound = errors.New("path not found") + errInvalidRequest = errors.New("invalid request") + errUnauthenticated = errors.New("unauthenticated") +) + +// ResolveGraphPath returns middleware that detects colon-syntax path +// lookup URLs and rewrites them to the canonical +// /{version}/drives/{driveID}/items/{resolvedItemID}{suffix} form before +// chi performs route matching. The requested API version is preserved +// (for example, "v1.0" or "v1beta1"). +// +// Two URL shapes are recognized: +// +// /{version}/drives/{driveID}/root:/[:/][:] +// /{version}/drives/{driveID}/items/{itemID}:/[:/][:] +// +// Path resolution runs as the request user via CS3 Stat. NOT_FOUND and +// PERMISSION_DENIED collapse to 404 (no existence disclosure); operational +// failures (gateway selection, RPC transport, unexpected status) surface +// as 5xx so outages aren't masked. +// +// URLs without colon syntax fast-path through with a single substring check. +func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log.Logger) func(http.Handler) http.Handler { + l := logger.With().Str("middleware", "graphPathLookup").Logger() + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Fast-path: skip URLs that can't be colon-syntax. + if !strings.Contains(r.URL.Path, ":") { + next.ServeHTTP(w, r) + return + } + + original := r.URL.Path + rewritten, err := rewriteColonPath(r.Context(), gws, l, original) + switch { + case errors.Is(err, errPathNotFound): + l.Debug().Str("original", original).Msg("colon-path resolution: not found") + errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") + return + case errors.Is(err, errInvalidRequest): + l.Debug().Str("original", original).Msg("colon-path resolution: invalid request") + errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, "invalid request") + return + case errors.Is(err, errUnauthenticated): + l.Debug().Str("original", original).Msg("colon-path resolution: unauthenticated") + errorcode.Unauthenticated.Render(w, r, http.StatusUnauthorized, "unauthenticated") + return + case err != nil: + l.Error().Err(err).Str("original", original).Msg("colon-path resolution: internal error") + errorcode.GeneralException.Render( + w, r, http.StatusInternalServerError, "internal error resolving path", + ) + return + case rewritten == "": + // No colon-syntax match — pass through untouched. + next.ServeHTTP(w, r) + return + } + + l.Debug(). + Str("original", original). + Str("rewritten", rewritten). + Msg("colon-path resolution: rewrote") + + ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) + r = r.WithContext(ctx) + r.URL.Path = rewritten + // Match the chi-escaping workaround in Graph.ServeHTTP: RawPath + // must be a valid encoded form of Path so chi's tree match (which + // reads RawPath when non-empty) routes the rewritten URL the same + // way it routes the original. The previous RawPath (set by + // Graph.ServeHTTP) was for the pre-rewrite Path and no longer + // matches; EscapedPath recomputes a canonical encoding for the + // new Path. + // + // Drive/item IDs in this codebase have the shape + // {storage}${space}!{opaque}. EscapedPath leaves `$` literal (not + // escaped in path segments per RFC 3986) but encodes `!` as `%21`. + // chi.URLParam therefore returns the param with `%21`; downstream + // handlers (e.g. parseIDParam, GetDriveAndItemIDParam) already + // call url.PathUnescape before parsing the ID, so the round-trip + // works. See go-chi/chi#641 for the underlying chi behavior. + r.URL.RawPath = r.URL.EscapedPath() + next.ServeHTTP(w, r) + }) + } +} + +// colonMatch is the normalized result of matching either the root- or +// item-anchored colon-syntax regex. The two regexes capture different +// groups; this struct erases that difference for downstream resolution. +type colonMatch struct { + prefix string // canonical prefix up to and including /drives/{driveID} + driveIDStr string // captured driveID for item-anchored form (empty for root-anchored, where anchorIDStr already IS the driveID) + anchorIDStr string // resource id to anchor path resolution (driveID for root, itemID for item-anchored) + relPath string // relative path with leading slash + suffix string // suffix with leading slash (e.g. "/children"); may be empty +} + +// rewriteColonPath returns: +// - "" + nil — no colon-syntax pattern matched (passthrough) +// - rewritten + nil — matched and resolved to a canonical URL +// - "" + errPathNotFound — path doesn't exist or user lacks permission (404) +// - "" + errInvalidRequest — malformed input (400) +// - "" + errUnauthenticated — gateway said caller isn't authenticated (401) +// - "" + other error — operational / internal failure (5xx) +func rewriteColonPath( + ctx context.Context, + gws pool.Selectable[gateway.GatewayAPIClient], + logger zerolog.Logger, + urlPath string, +) (string, error) { + var match colonMatch + switch { + case matchInto(rootColonRe, urlPath, &match, rootMatchExtract): + case matchInto(itemColonRe, urlPath, &match, itemMatchExtract): + default: + return "", nil + } + + // r.URL.Path is already URL-decoded by net/http; do NOT call url.PathUnescape + // here, that would double-decode and let crafted inputs like "%252F" become + // "/", changing path semantics. + anchor, err := storagespace.ParseID(match.anchorIDStr) + if err != nil { + // Unparseable input is malformed by the client, not "not found". + logger.Debug().Err(err).Str("anchor", match.anchorIDStr).Msg("invalid anchor id in colon path") + return "", errInvalidRequest + } + + // Item-anchored form ("/drives/{driveID}/items/{itemID}:/...") captures + // driveID separately. Validate the itemID belongs to the given driveID + // (storage + space prefix) — otherwise the request is malformed and we + // short-circuit instead of doing an unnecessary CS3 Stat. + if match.driveIDStr != "" { + drive, err := storagespace.ParseID(match.driveIDStr) + if err != nil { + logger.Debug().Err(err).Str("driveID", match.driveIDStr).Msg("invalid drive id in colon path") + return "", errInvalidRequest + } + if drive.GetStorageId() != anchor.GetStorageId() || drive.GetSpaceId() != anchor.GetSpaceId() { + logger.Debug(). + Str("driveID", match.driveIDStr). + Str("itemID", match.anchorIDStr). + Msg("drive id does not match item id storage/space") + return "", errInvalidRequest + } + } + + itemID, err := resolvePath(ctx, gws, logger, &anchor, match.relPath) + if err != nil { + return "", err + } + return buildCanonicalPath(match.prefix, itemID, match.suffix), nil +} + +// matchInto runs the regex; on a hit, populates *out via extract and returns true. +// A small indirection that lets the switch in rewriteColonPath stay flat. +func matchInto(re *regexp.Regexp, s string, out *colonMatch, extract func([]string) colonMatch) bool { + m := re.FindStringSubmatch(s) + if m == nil { + return false + } + *out = extract(m) + return true +} + +func rootMatchExtract(m []string) colonMatch { + // Root-anchored: anchorIDStr IS the driveID, so leave driveIDStr empty + // to skip the drive/item match check (it would compare driveID to itself). + return colonMatch{prefix: m[1], anchorIDStr: m[2], relPath: m[3], suffix: m[4]} +} + +func itemMatchExtract(m []string) colonMatch { + // Item-anchored: capture driveID (m[2]) so the resolver can validate it + // matches the itemID's storage/space. + return colonMatch{prefix: m[1], driveIDStr: m[2], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} +} + +// resolvePath translates a relative filesystem path (anchored at the given +// CS3 resource id) into the resolved item's id, running with the request +// user's permissions via CS3 Stat. +func resolvePath( + ctx context.Context, + gws pool.Selectable[gateway.GatewayAPIClient], + logger zerolog.Logger, + anchor *storageprovider.ResourceId, + rawPath string, +) (string, error) { + gw, err := gws.Next() + if err != nil { + return "", fmt.Errorf("gateway selector: %w", err) + } + + // rawPath comes from r.URL.Path which is already decoded by net/http — + // no extra unescape (would double-decode crafted "%25xx" inputs). + statRes, err := gw.Stat(ctx, &storageprovider.StatRequest{ + Ref: &storageprovider.Reference{ + ResourceId: anchor, + Path: utils.MakeRelativePath(rawPath), + }, + }) + if err != nil { + return "", fmt.Errorf("CS3 Stat: %w", err) + } + + switch statRes.GetStatus().GetCode() { + case cs3rpc.Code_CODE_OK: + // fall through + case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: + return "", errPathNotFound + case cs3rpc.Code_CODE_UNAUTHENTICATED: + return "", errUnauthenticated + default: + return "", fmt.Errorf( + "CS3 Stat returned %s: %s", + statRes.GetStatus().GetCode(), + statRes.GetStatus().GetMessage(), + ) + } + + id := statRes.GetInfo().GetId() + if id == nil { + return "", fmt.Errorf("CS3 Stat returned OK but missing Info.Id") + } + return storagespace.FormatResourceID(id), nil +} + +func buildCanonicalPath(prefix, itemID, suffix string) string { + // r.URL.Path is the decoded form per Go's net/http convention; chi tree + // matching reads it directly. Insert the raw itemID without escaping so + // chi's {itemID} param captures the same string the handler will see + // after parseIDParam unescapes (no-op for already-decoded chars). + return prefix + "/items/" + itemID + suffix +} diff --git a/services/graph/pkg/middleware/path_lookup_test.go b/services/graph/pkg/middleware/path_lookup_test.go new file mode 100644 index 0000000000..b4509550de --- /dev/null +++ b/services/graph/pkg/middleware/path_lookup_test.go @@ -0,0 +1,303 @@ +package middleware_test + +import ( + "net/http" + "net/http/httptest" + "net/url" + "testing" + + gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" + cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" + storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" + "github.com/go-chi/chi/v5" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "google.golang.org/grpc" + + "github.com/opencloud-eu/opencloud/pkg/log" + "github.com/opencloud-eu/opencloud/services/graph/pkg/middleware" + "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" + cs3mocks "github.com/opencloud-eu/reva/v2/tests/cs3mocks/mocks" +) + +const ( + testDriveID = "storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd" + testItemID = "storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d" +) + +func newTestSelector(t *testing.T, gw *cs3mocks.GatewayAPIClient) pool.Selectable[gateway.GatewayAPIClient] { + t.Helper() + // Unique key per test so pool's selector cache doesn't hand back a stale gw. + // t.Cleanup removes the entry from pool's global selectors map after the + // subtest, so global state doesn't grow across the suite. + svcName := "TestGatewaySelector" + key := "test.gateway." + t.Name() + pool.RemoveSelector(svcName + key) + t.Cleanup(func() { pool.RemoveSelector(svcName + key) }) + return pool.GetSelector[gateway.GatewayAPIClient]( + svcName, + key, + func(cc grpc.ClientConnInterface) gateway.GatewayAPIClient { return gw }, + ) +} + +// statResponse builds a CS3 StatResponse with the given status code, optionally +// returning a resource info populated with testItemID for OK responses. +func statResponse(code cs3rpc.Code, withInfo bool) *storageprovider.StatResponse { + res := &storageprovider.StatResponse{Status: &cs3rpc.Status{Code: code}} + if withInfo { + res.Info = &storageprovider.ResourceInfo{ + Id: &storageprovider.ResourceId{ + StorageId: "storage-users-1", + SpaceId: "f503f6fe-2656-4b0f-8289-fb3184962dfd", + OpaqueId: "f0e20017-9cba-498a-87e5-3467b976604d", + }, + } + } + return res +} + +// recordingHandler captures the request URL path it receives so tests can assert +// the middleware rewrote (or passed through) correctly. +func recordingHandler() (http.Handler, *string) { + var seen string + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + seen = r.URL.Path + w.WriteHeader(http.StatusOK) + }), &seen +} + +func TestResolveGraphPath(t *testing.T) { + tests := []struct { + name string + urlPath string + statCode cs3rpc.Code + statErr error + expectStatCalled bool + expectStatus int + expectURLPath string // empty = handler not invoked + }{ + { + name: "non-colon URL passes through unchanged", + urlPath: "/graph/v1.0/me/drives", + expectStatCalled: false, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/me/drives", + }, + { + name: "URL with colon but not matching pattern passes through", + urlPath: "/graph/v1.0/some/other/route:/foo", + expectStatCalled: false, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/some/other/route:/foo", + }, + { + name: "v1.0 root-anchored with /children rewrites", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "v1beta1 root-anchored with /createLink rewrites", + urlPath: "/graph/v1beta1/drives/" + testDriveID + "/root:/Documents:/createLink", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1beta1/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/createLink", + }, + { + name: "trailing colon (no suffix) rewrites to bare item URL", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d", + }, + { + name: "no trailing colon, no suffix rewrites to bare item URL", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d", + }, + { + name: "deep path rewrites correctly", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents/Reports:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "item-anchored colon syntax rewrites", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/items/" + testItemID + ":/notes.txt:/children", + statCode: cs3rpc.Code_CODE_OK, + expectStatCalled: true, + expectStatus: http.StatusOK, + expectURLPath: "/graph/v1.0/drives/" + testDriveID + "/items/storage-users-1$f503f6fe-2656-4b0f-8289-fb3184962dfd!f0e20017-9cba-498a-87e5-3467b976604d/children", + }, + { + name: "Stat NOT_FOUND returns 404 without invoking handler", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Missing:", + statCode: cs3rpc.Code_CODE_NOT_FOUND, + expectStatCalled: true, + expectStatus: http.StatusNotFound, + expectURLPath: "", // handler must NOT be called + }, + { + // CRITICAL security test: PERMISSION_DENIED must not leak existence. + // We collapse it to 404, identical to NOT_FOUND, so an unauthorized + // caller can't distinguish "doesn't exist" from "exists but you can't see it". + name: "Stat PERMISSION_DENIED returns 404 (not 403) — don't disclose existence", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Restricted:", + statCode: cs3rpc.Code_CODE_PERMISSION_DENIED, + expectStatCalled: true, + expectStatus: http.StatusNotFound, + expectURLPath: "", + }, + { + // Operational/unexpected CS3 statuses must NOT be collapsed to 404 — + // that would mask outages. Surface as 500 like other graph handlers do. + name: "Stat unexpected status returns 500 (not 404 — don't mask outages)", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Anything:", + statCode: cs3rpc.Code_CODE_INTERNAL, + expectStatCalled: true, + expectStatus: http.StatusInternalServerError, + expectURLPath: "", + }, + { + // UNAUTHENTICATED is its own distinct outcome — must surface as 401, + // not 500, so clients can detect "your token is bad" vs "server error". + name: "Stat UNAUTHENTICATED returns 401", + urlPath: "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:", + statCode: cs3rpc.Code_CODE_UNAUTHENTICATED, + expectStatCalled: true, + expectStatus: http.StatusUnauthorized, + expectURLPath: "", + }, + { + // Item-anchored form with a driveID that doesn't match the itemID's + // storage/space — the request is malformed; short-circuit to 400 + // instead of doing a Stat that would only fail downstream. + name: "drive id and item id storage/space mismatch returns 400", + urlPath: "/graph/v1.0/drives/storage-users-2$other-space-id/items/" + testItemID + ":/notes.txt:/children", + expectStatCalled: false, + expectStatus: http.StatusBadRequest, + expectURLPath: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + if tt.expectStatCalled { + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(tt.statCode, tt.statCode == cs3rpc.Code_CODE_OK), tt.statErr) + } + + selector := newTestSelector(t, gw) + handler, seen := recordingHandler() + mw := middleware.ResolveGraphPath(selector, log.NopLogger()) + + req := httptest.NewRequest(http.MethodGet, "http://localhost"+tt.urlPath, nil) + rr := httptest.NewRecorder() + + mw(handler).ServeHTTP(rr, req) + + assert.Equal(t, tt.expectStatus, rr.Code, "status code") + assert.Equal(t, tt.expectURLPath, *seen, "URL seen by next handler") + + if tt.expectStatCalled { + gw.AssertCalled(t, "Stat", mock.Anything, mock.Anything) + } else { + gw.AssertNotCalled(t, "Stat", mock.Anything, mock.Anything) + } + }) + } +} + +// TestResolveGraphPath_ChiParamRoundTripWithSubDelims pins down what chi +// actually returns from URLParam for rewritten URLs whose IDs contain `$` +// and `!`, and verifies the PathUnescape round-trip downstream handlers +// rely on still recovers the original ID. +// +// Specifically: r.URL.RawPath = r.URL.EscapedPath() leaves `$` literal but +// encodes `!` as `%21` (net/url's encodePath behavior). chi.URLParam returns +// the matched RawPath segment as-is, so the bound param contains `%21`. +// Existing handlers (parseIDParam, GetDriveAndItemIDParam) call +// url.PathUnescape on the param before parsing the ID, which recovers `!`. +// +// This test guards against any future change to the encoding strategy that +// would silently break that round-trip, for example, switching to +// url.PathEscape(itemID) and stuffing the result into r.URL.Path (which +// expects the decoded form) and thereby double-encoding `!` to `%2521`. +func TestResolveGraphPath_ChiParamRoundTripWithSubDelims(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(cs3rpc.Code_CODE_OK, true), nil) + + selector := newTestSelector(t, gw) + + var gotDriveID, gotItemID string + r := chi.NewRouter() + r.Use(middleware.ResolveGraphPath(selector, log.NopLogger())) + r.Route("/graph/v1.0/drives/{driveID}", func(r chi.Router) { + r.Get("/items/{itemID}/children", func(w http.ResponseWriter, r *http.Request) { + gotDriveID = chi.URLParam(r, "driveID") + gotItemID = chi.URLParam(r, "itemID") + w.WriteHeader(http.StatusOK) + }) + }) + + req := httptest.NewRequest( + http.MethodGet, + "http://localhost/graph/v1.0/drives/"+testDriveID+"/root:/Documents:/children", + nil, + ) + rr := httptest.NewRecorder() + r.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code, "chi must route the rewritten URL to the handler") + + // driveID has only `$` (a sub-delim left literal by EscapedPath), so chi + // returns it without any percent-encoding. + assert.Equal(t, testDriveID, gotDriveID, + "chi.URLParam should return driveID unchanged: only `$` sub-delim, left literal by EscapedPath") + + // itemID has both `$` and `!`. EscapedPath encodes `!` as `%21`, so chi + // returns the param with `%21` literal; PathUnescape recovers the + // original ID. This is the round-trip downstream handlers rely on. + unescaped, err := url.PathUnescape(gotItemID) + assert.NoError(t, err, "URLParam value must be a valid percent-encoded string") + assert.Equal(t, testItemID, unescaped, + "PathUnescape(chi.URLParam(itemID)) must recover the original ID") +} + +// TestResolveGraphPath_OriginalPathContext verifies the rewrite preserves the +// original URL in request context for downstream tracing/logging. +func TestResolveGraphPath_OriginalPathContext(t *testing.T) { + gw := &cs3mocks.GatewayAPIClient{} + gw.On("Stat", mock.Anything, mock.Anything). + Return(statResponse(cs3rpc.Code_CODE_OK, true), nil) + + selector := newTestSelector(t, gw) + original := "/graph/v1.0/drives/" + testDriveID + "/root:/Documents:/children" + + var capturedOriginal interface{} + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedOriginal = r.Context().Value(middleware.OriginalPathContextKey) + w.WriteHeader(http.StatusOK) + }) + + mw := middleware.ResolveGraphPath(selector, log.NopLogger()) + req := httptest.NewRequest(http.MethodGet, "http://localhost"+original, nil) + rr := httptest.NewRecorder() + + mw(handler).ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + assert.Equal(t, original, capturedOriginal, "original URL must be available via OriginalPathContextKey") +} diff --git a/services/graph/pkg/service/v0/service.go b/services/graph/pkg/service/v0/service.go index 6c9880ba28..e71e0dd7d9 100644 --- a/services/graph/pkg/service/v0/service.go +++ b/services/graph/pkg/service/v0/service.go @@ -131,6 +131,12 @@ func NewService(opts ...Option) (Graph, error) { //nolint:maintidx m := chi.NewMux() m.Use(options.Middleware...) + // Must be top-level mux.Use, not r.Use inside a Route block: chi + // sub-router middleware runs after the prefix has already been matched, + // so URL rewriting at that point can't redirect to a different leaf + // route. Top-level middleware runs before any matching, so the rewritten + // URL is what chi walks the radix tree against. + m.Use(graphm.ResolveGraphPath(options.GatewaySelector, options.Logger)) m.Use( otelchi.Middleware( "graph", diff --git a/tests/acceptance/bootstrap/GraphContext.php b/tests/acceptance/bootstrap/GraphContext.php index 49782fa137..3619169d50 100644 --- a/tests/acceptance/bootstrap/GraphContext.php +++ b/tests/acceptance/bootstrap/GraphContext.php @@ -3336,4 +3336,169 @@ public function userHasUnmarkedItemFromSpaceAsFavoriteUsingTheGraphApi( $response = $this->unmarkFavorite($user, $itemId); $this->featureContext->theHTTPStatusCodeShouldBe(204, '', $response); } + + /** + * Encode a colon-syntax path segment so slashes survive but the + * structural ":" delimiters of the Graph URL remain literal. + * + * @param string $path + * + * @return string + */ + private function encodeColonPathSegment(string $path): string { + $path = \ltrim($path, '/'); + $parts = \explode('/', $path); + $encoded = \array_map('rawurlencode', $parts); + return \implode('/', $encoded); + } + + /** + * Send a Graph API request and capture the response on FeatureContext. + * + * @param string $user + * @param string $method + * @param string $relativeUrl + * + * @return void + */ + private function sendGraphRequestAndCaptureResponse( + string $user, + string $method, + string $relativeUrl + ): void { + $response = $this->featureContext->sendingToWithDirectUrl($user, $method, $relativeUrl); + $this->featureContext->setResponse($response); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}, exercising the + * colon-syntax path lookup middleware (root-anchored, no suffix). + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfSpace( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of space "([^"]*)" with trailing colon using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}: with the optional + * trailing ":" — verifies the middleware accepts both forms. + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfSpaceWithTrailingColon( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded:"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" relative to folder "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/items/{itemID}:/{relPath}, the + * item-anchored colon-syntax form. + * + * @param string $user + * @param string $relPath + * @param string $folderName + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathRelativeToFolderOfSpace( + string $user, + string $relPath, + string $folderName, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $folderId = $this->spacesContext->getResourceId($user, $spaceName, $folderName); + $encoded = $this->encodeColonPathSegment($relPath); + $url = "/graph/$apiVersion/drives/$driveId/items/$folderId:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" lists permissions of the drive item with colon path "([^"]*)" of space "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Hits /graph/{version}/drives/{driveID}/root:/{path}:/permissions, the + * "colon path with suffix" form (root-anchored colon path + canonical + * sub-route). + * + * @param string $user + * @param string $path + * @param string $spaceName + * @param string $apiVersion + * + * @return void + */ + public function userListsPermissionsOfDriveItemWithColonPathOfSpace( + string $user, + string $path, + string $spaceName, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($user, $spaceName); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded:/permissions"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } + + /** + * @When /^user "([^"]*)" gets the drive item with colon path "([^"]*)" of the personal space of "([^"]*)" using the Graph API version "(v1\.0|v1beta1)"$/ + * + * Same as userGetsDriveItemWithColonPathOfSpace, but the request is + * issued by :user against the personal space drive ID of :owner. Used + * for security tests where one user attempts to reach another user's + * resource via colon syntax — should be indistinguishable from a + * not-found result. + * + * @param string $user + * @param string $path + * @param string $owner + * @param string $apiVersion + * + * @return void + */ + public function userGetsDriveItemWithColonPathOfPersonalSpaceOf( + string $user, + string $path, + string $owner, + string $apiVersion + ): void { + $driveId = $this->spacesContext->getSpaceIdByName($owner, "Personal"); + $encoded = $this->encodeColonPathSegment($path); + $url = "/graph/$apiVersion/drives/$driveId/root:/$encoded"; + $this->sendGraphRequestAndCaptureResponse($user, "GET", $url); + } } diff --git a/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature b/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature new file mode 100644 index 0000000000..d8e445f769 --- /dev/null +++ b/tests/acceptance/features/apiGraph/colonSyntaxPathLookup.feature @@ -0,0 +1,114 @@ +Feature: colon-syntax path lookup on the Graph API + As a client + I want to address drive items by path using colon-syntax URLs on the Graph API + So that I do not have to walk the path with successive lookups before issuing a request + + The colon-syntax shapes recognised by the path-lookup middleware are: + + /graph/{version}/drives/{driveID}/root:/[:/][:] + /graph/{version}/drives/{driveID}/items/{itemID}:/[:/][:] + + Both /v1.0 and /v1beta1 are supported. NOT_FOUND and PERMISSION_DENIED + collapse to 404 so the middleware never discloses the existence of + resources the caller is not allowed to see. + + Background: + Given user "Alice" has been created with default attributes + And user "Alice" has created folder "folder1" + And user "Alice" has created folder "folder1/sub" + And user "Alice" has uploaded file with content "hello" to "folder1/file.txt" + And user "Alice" has uploaded file with content "deep" to "folder1/sub/deep.txt" + + + Scenario: get a drive item by root-anchored colon path + When user "Alice" gets the drive item with colon path "folder1/file.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name", "parentReference"], + "properties": { + "id": { + "type": "string", + "pattern": "^%file_id_pattern%$" + }, + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: get a drive item by root-anchored colon path with a deep path + When user "Alice" gets the drive item with colon path "folder1/sub/deep.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "deep.txt" + } + } + } + """ + + + Scenario: get a drive item by root-anchored colon path with a trailing colon + When user "Alice" gets the drive item with colon path "folder1/file.txt" of space "Personal" with trailing colon using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: get a drive item by item-anchored colon path + When user "Alice" gets the drive item with colon path "file.txt" relative to folder "folder1" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "200" + And the JSON data of the response should match + """ + { + "type": "object", + "required": ["id", "name"], + "properties": { + "name": { + "const": "file.txt" + } + } + } + """ + + + Scenario: list permissions of a drive item via colon path with a sub-route suffix on v1beta1 + # Exercises the "/:/" rewrite shape and, at the same time, + # the v1beta1 mount of the middleware. The /permissions sub-route is only + # registered at /v1beta1/, and the canonical /v1beta1 GetDriveItem + # handler is share-jail-only, so this is the cleanest way to assert that + # the v1beta1 colon-syntax path actually reaches a working handler for + # regular drive items. + When user "Alice" lists permissions of the drive item with colon path "folder1" of space "Personal" using the Graph API version "v1beta1" + Then the HTTP status code should be "200" + + + Scenario: non-existent colon path returns 404 + When user "Alice" gets the drive item with colon path "folder1/does-not-exist.txt" of space "Personal" using the Graph API version "v1.0" + Then the HTTP status code should be "404" + + + Scenario: another user cannot disclose existence of a resource via colon path + Given user "Brian" has been created with default attributes + When user "Brian" gets the drive item with colon path "folder1/file.txt" of the personal space of "Alice" using the Graph API version "v1.0" + Then the HTTP status code should be "404"