-
Notifications
You must be signed in to change notification settings - Fork 200
feat(graph): add MS Graph colon-syntax path lookup middleware #2688
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
dschmidt
wants to merge
7
commits into
opencloud-eu:main
Choose a base branch
from
dschmidt:feat/colon-path-middleware
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
1eda842
feat(graph): add MS Graph colon-syntax path lookup middleware
dschmidt 2d5693f
fix(graph): address Copilot review feedback on path lookup middleware
dschmidt 1fc1bd4
fix(graph): preserve RawPath workaround + use NopLogger in tests
dschmidt d177df0
fix(graph): map UNAUTHENTICATED to 401, validate drive/item id pair
dschmidt b920282
refactor(graph): drop MS Graph framing from colon-path middleware logs
dschmidt adce092
test(graph): acceptance tests for MS Graph colon-syntax path lookup
dschmidt fa4f72d
test(graph): pin chi URLParam round-trip for IDs with `!` sub-delim
dschmidt File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,291 @@ | ||
| package middleware | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "net/http" | ||
| "regexp" | ||
| "strings" | ||
|
|
||
| gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1" | ||
| cs3rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1" | ||
| storageprovider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" | ||
|
|
||
| "github.com/rs/zerolog" | ||
|
|
||
| "github.com/opencloud-eu/opencloud/pkg/log" | ||
| "github.com/opencloud-eu/opencloud/services/graph/pkg/errorcode" | ||
| "github.com/opencloud-eu/reva/v2/pkg/rgrpc/todo/pool" | ||
| "github.com/opencloud-eu/reva/v2/pkg/storagespace" | ||
| "github.com/opencloud-eu/reva/v2/pkg/utils" | ||
| ) | ||
|
|
||
| // {HTTP.Root}/{version}/drives/{driveID}/root:/<path>[:/<suffix>][:] | ||
| // where {version} is "v1.0" or "v1beta1" — preserves the requested version | ||
| // so it lands on a registered route. | ||
| // group 1: prefix up to and including the driveID | ||
| // group 2: driveID | ||
| // group 3: path (with leading slash) | ||
| // group 4: suffix (with leading slash) — empty for direct item lookup | ||
| var rootColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/root:(/.+?)(?::(/[^:]+))?:?$`) | ||
|
|
||
| // {HTTP.Root}/{version}/drives/{driveID}/items/{itemID}:/<path>[:/<suffix>][:] | ||
| // group 1: prefix up to and including the driveID | ||
| // group 2: driveID | ||
| // group 3: anchor itemID | ||
| // group 4: path (with leading slash) | ||
| // group 5: suffix (with leading slash) | ||
| var itemColonRe = regexp.MustCompile(`^(.*?/v1(?:\.0|beta1)/drives/([^/]+))/items/([^/]+):(/.+?)(?::(/[^:]+))?:?$`) | ||
|
|
||
| type contextKey string | ||
|
|
||
| // OriginalPathContextKey holds the pre-rewrite request path for downstream | ||
| // tracing/logging consumers. | ||
| const OriginalPathContextKey contextKey = "graph.original_path" | ||
|
|
||
| // Sentinels distinguishing the resolution outcomes that map to specific HTTP | ||
| // statuses. Anything else surfaces as 500. | ||
| // | ||
| // errPathNotFound — path doesn't exist or the user lacks permission to | ||
| // see it. Both collapse to 404 (no existence disclosure). | ||
| // errInvalidRequest — client sent a malformed input (unparseable drive/item | ||
| // id, drive/item mismatch in item-anchored form). 400. | ||
| // errUnauthenticated — the gateway said the caller isn't authenticated for | ||
| // the lookup (token expired, cross-storage auth, etc.). 401. | ||
| var ( | ||
| errPathNotFound = errors.New("path not found") | ||
| errInvalidRequest = errors.New("invalid request") | ||
| errUnauthenticated = errors.New("unauthenticated") | ||
| ) | ||
|
|
||
| // ResolveGraphPath returns middleware that detects colon-syntax path | ||
| // lookup URLs and rewrites them to the canonical | ||
| // /{version}/drives/{driveID}/items/{resolvedItemID}{suffix} form before | ||
| // chi performs route matching. The requested API version is preserved | ||
| // (for example, "v1.0" or "v1beta1"). | ||
| // | ||
| // Two URL shapes are recognized: | ||
| // | ||
| // /{version}/drives/{driveID}/root:/<path>[:/<suffix>][:] | ||
| // /{version}/drives/{driveID}/items/{itemID}:/<path>[:/<suffix>][:] | ||
| // | ||
| // Path resolution runs as the request user via CS3 Stat. NOT_FOUND and | ||
| // PERMISSION_DENIED collapse to 404 (no existence disclosure); operational | ||
| // failures (gateway selection, RPC transport, unexpected status) surface | ||
| // as 5xx so outages aren't masked. | ||
| // | ||
| // URLs without colon syntax fast-path through with a single substring check. | ||
| func ResolveGraphPath(gws pool.Selectable[gateway.GatewayAPIClient], logger log.Logger) func(http.Handler) http.Handler { | ||
| l := logger.With().Str("middleware", "graphPathLookup").Logger() | ||
| return func(next http.Handler) http.Handler { | ||
| return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
| // Fast-path: skip URLs that can't be colon-syntax. | ||
| if !strings.Contains(r.URL.Path, ":") { | ||
| next.ServeHTTP(w, r) | ||
| return | ||
| } | ||
|
|
||
| original := r.URL.Path | ||
| rewritten, err := rewriteColonPath(r.Context(), gws, l, original) | ||
| switch { | ||
| case errors.Is(err, errPathNotFound): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: not found") | ||
| errorcode.ItemNotFound.Render(w, r, http.StatusNotFound, "item not found") | ||
| return | ||
| case errors.Is(err, errInvalidRequest): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: invalid request") | ||
| errorcode.InvalidRequest.Render(w, r, http.StatusBadRequest, "invalid request") | ||
| return | ||
| case errors.Is(err, errUnauthenticated): | ||
| l.Debug().Str("original", original).Msg("colon-path resolution: unauthenticated") | ||
| errorcode.Unauthenticated.Render(w, r, http.StatusUnauthorized, "unauthenticated") | ||
| return | ||
| case err != nil: | ||
| l.Error().Err(err).Str("original", original).Msg("colon-path resolution: internal error") | ||
| errorcode.GeneralException.Render( | ||
| w, r, http.StatusInternalServerError, "internal error resolving path", | ||
| ) | ||
| return | ||
| case rewritten == "": | ||
| // No colon-syntax match — pass through untouched. | ||
| next.ServeHTTP(w, r) | ||
| return | ||
| } | ||
|
|
||
| l.Debug(). | ||
| Str("original", original). | ||
| Str("rewritten", rewritten). | ||
| Msg("colon-path resolution: rewrote") | ||
|
|
||
| ctx := context.WithValue(r.Context(), OriginalPathContextKey, original) | ||
| r = r.WithContext(ctx) | ||
| r.URL.Path = rewritten | ||
| // Match the chi-escaping workaround in Graph.ServeHTTP: RawPath | ||
| // must be a valid encoded form of Path so chi's tree match (which | ||
| // reads RawPath when non-empty) routes the rewritten URL the same | ||
| // way it routes the original. The previous RawPath (set by | ||
| // Graph.ServeHTTP) was for the pre-rewrite Path and no longer | ||
| // matches; EscapedPath recomputes a canonical encoding for the | ||
| // new Path. | ||
| // | ||
| // Drive/item IDs in this codebase have the shape | ||
| // {storage}${space}!{opaque}. EscapedPath leaves `$` literal (not | ||
| // escaped in path segments per RFC 3986) but encodes `!` as `%21`. | ||
| // chi.URLParam therefore returns the param with `%21`; downstream | ||
| // handlers (e.g. parseIDParam, GetDriveAndItemIDParam) already | ||
| // call url.PathUnescape before parsing the ID, so the round-trip | ||
| // works. See go-chi/chi#641 for the underlying chi behavior. | ||
| r.URL.RawPath = r.URL.EscapedPath() | ||
| next.ServeHTTP(w, r) | ||
|
dschmidt marked this conversation as resolved.
|
||
| }) | ||
| } | ||
| } | ||
|
|
||
| // colonMatch is the normalized result of matching either the root- or | ||
| // item-anchored colon-syntax regex. The two regexes capture different | ||
| // groups; this struct erases that difference for downstream resolution. | ||
| type colonMatch struct { | ||
| prefix string // canonical prefix up to and including /drives/{driveID} | ||
| driveIDStr string // captured driveID for item-anchored form (empty for root-anchored, where anchorIDStr already IS the driveID) | ||
| anchorIDStr string // resource id to anchor path resolution (driveID for root, itemID for item-anchored) | ||
| relPath string // relative path with leading slash | ||
| suffix string // suffix with leading slash (e.g. "/children"); may be empty | ||
| } | ||
|
|
||
| // rewriteColonPath returns: | ||
| // - "" + nil — no colon-syntax pattern matched (passthrough) | ||
| // - rewritten + nil — matched and resolved to a canonical URL | ||
| // - "" + errPathNotFound — path doesn't exist or user lacks permission (404) | ||
| // - "" + errInvalidRequest — malformed input (400) | ||
| // - "" + errUnauthenticated — gateway said caller isn't authenticated (401) | ||
| // - "" + other error — operational / internal failure (5xx) | ||
| func rewriteColonPath( | ||
| ctx context.Context, | ||
| gws pool.Selectable[gateway.GatewayAPIClient], | ||
| logger zerolog.Logger, | ||
| urlPath string, | ||
| ) (string, error) { | ||
| var match colonMatch | ||
| switch { | ||
| case matchInto(rootColonRe, urlPath, &match, rootMatchExtract): | ||
| case matchInto(itemColonRe, urlPath, &match, itemMatchExtract): | ||
| default: | ||
| return "", nil | ||
| } | ||
|
|
||
| // r.URL.Path is already URL-decoded by net/http; do NOT call url.PathUnescape | ||
| // here, that would double-decode and let crafted inputs like "%252F" become | ||
| // "/", changing path semantics. | ||
| anchor, err := storagespace.ParseID(match.anchorIDStr) | ||
| if err != nil { | ||
| // Unparseable input is malformed by the client, not "not found". | ||
| logger.Debug().Err(err).Str("anchor", match.anchorIDStr).Msg("invalid anchor id in colon path") | ||
| return "", errInvalidRequest | ||
| } | ||
|
|
||
| // Item-anchored form ("/drives/{driveID}/items/{itemID}:/...") captures | ||
| // driveID separately. Validate the itemID belongs to the given driveID | ||
| // (storage + space prefix) — otherwise the request is malformed and we | ||
| // short-circuit instead of doing an unnecessary CS3 Stat. | ||
| if match.driveIDStr != "" { | ||
| drive, err := storagespace.ParseID(match.driveIDStr) | ||
| if err != nil { | ||
| logger.Debug().Err(err).Str("driveID", match.driveIDStr).Msg("invalid drive id in colon path") | ||
| return "", errInvalidRequest | ||
| } | ||
| if drive.GetStorageId() != anchor.GetStorageId() || drive.GetSpaceId() != anchor.GetSpaceId() { | ||
| logger.Debug(). | ||
| Str("driveID", match.driveIDStr). | ||
| Str("itemID", match.anchorIDStr). | ||
| Msg("drive id does not match item id storage/space") | ||
| return "", errInvalidRequest | ||
| } | ||
| } | ||
|
|
||
| itemID, err := resolvePath(ctx, gws, logger, &anchor, match.relPath) | ||
| if err != nil { | ||
| return "", err | ||
| } | ||
| return buildCanonicalPath(match.prefix, itemID, match.suffix), nil | ||
| } | ||
|
|
||
| // matchInto runs the regex; on a hit, populates *out via extract and returns true. | ||
| // A small indirection that lets the switch in rewriteColonPath stay flat. | ||
| func matchInto(re *regexp.Regexp, s string, out *colonMatch, extract func([]string) colonMatch) bool { | ||
| m := re.FindStringSubmatch(s) | ||
| if m == nil { | ||
| return false | ||
| } | ||
| *out = extract(m) | ||
| return true | ||
| } | ||
|
|
||
| func rootMatchExtract(m []string) colonMatch { | ||
| // Root-anchored: anchorIDStr IS the driveID, so leave driveIDStr empty | ||
| // to skip the drive/item match check (it would compare driveID to itself). | ||
| return colonMatch{prefix: m[1], anchorIDStr: m[2], relPath: m[3], suffix: m[4]} | ||
| } | ||
|
|
||
| func itemMatchExtract(m []string) colonMatch { | ||
| // Item-anchored: capture driveID (m[2]) so the resolver can validate it | ||
| // matches the itemID's storage/space. | ||
| return colonMatch{prefix: m[1], driveIDStr: m[2], anchorIDStr: m[3], relPath: m[4], suffix: m[5]} | ||
| } | ||
|
dschmidt marked this conversation as resolved.
|
||
|
|
||
| // resolvePath translates a relative filesystem path (anchored at the given | ||
| // CS3 resource id) into the resolved item's id, running with the request | ||
| // user's permissions via CS3 Stat. | ||
| func resolvePath( | ||
| ctx context.Context, | ||
| gws pool.Selectable[gateway.GatewayAPIClient], | ||
| logger zerolog.Logger, | ||
| anchor *storageprovider.ResourceId, | ||
| rawPath string, | ||
| ) (string, error) { | ||
| gw, err := gws.Next() | ||
| if err != nil { | ||
| return "", fmt.Errorf("gateway selector: %w", err) | ||
| } | ||
|
|
||
| // rawPath comes from r.URL.Path which is already decoded by net/http — | ||
| // no extra unescape (would double-decode crafted "%25xx" inputs). | ||
| statRes, err := gw.Stat(ctx, &storageprovider.StatRequest{ | ||
| Ref: &storageprovider.Reference{ | ||
| ResourceId: anchor, | ||
| Path: utils.MakeRelativePath(rawPath), | ||
| }, | ||
| }) | ||
| if err != nil { | ||
| return "", fmt.Errorf("CS3 Stat: %w", err) | ||
| } | ||
|
|
||
| switch statRes.GetStatus().GetCode() { | ||
| case cs3rpc.Code_CODE_OK: | ||
| // fall through | ||
| case cs3rpc.Code_CODE_NOT_FOUND, cs3rpc.Code_CODE_PERMISSION_DENIED: | ||
| return "", errPathNotFound | ||
| case cs3rpc.Code_CODE_UNAUTHENTICATED: | ||
| return "", errUnauthenticated | ||
| default: | ||
| return "", fmt.Errorf( | ||
| "CS3 Stat returned %s: %s", | ||
| statRes.GetStatus().GetCode(), | ||
| statRes.GetStatus().GetMessage(), | ||
| ) | ||
|
dschmidt marked this conversation as resolved.
|
||
| } | ||
|
|
||
| id := statRes.GetInfo().GetId() | ||
| if id == nil { | ||
| return "", fmt.Errorf("CS3 Stat returned OK but missing Info.Id") | ||
| } | ||
| return storagespace.FormatResourceID(id), nil | ||
| } | ||
|
|
||
| func buildCanonicalPath(prefix, itemID, suffix string) string { | ||
| // r.URL.Path is the decoded form per Go's net/http convention; chi tree | ||
| // matching reads it directly. Insert the raw itemID without escaping so | ||
| // chi's {itemID} param captures the same string the handler will see | ||
| // after parseIDParam unescapes (no-op for already-decoded chars). | ||
| return prefix + "/items/" + itemID + suffix | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.