From 1d42286f80a09a71a8118104f23820efa98b3239 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Thu, 30 Apr 2026 18:32:19 +0530 Subject: [PATCH 01/10] fix: atomic last-owner guard prevents TOCTOU race on role demotion Add DeleteWithMinRoleGuard to the policy repository that atomically checks the owner count within the DELETE statement itself. This eliminates the race where two concurrent demotion requests both pass the application-level owner count check then both proceed. The SQL condition ensures the DELETE only executes if the policy being removed is either not the guarded role, or at least one other policy with that role exists for the same resource. If the condition fails (last owner), zero rows are affected and ErrLastRoleGuard is returned. The existing validateMinOwnerConstraint remains as a fast-path optimization to avoid unnecessary DB round-trips. --- core/membership/mocks/policy_service.go | 50 +++++++++++++++++ core/membership/service.go | 35 +++++++++++- core/membership/service_test.go | 24 ++++---- core/policy/errors.go | 1 + core/policy/mocks/repository.go | 50 +++++++++++++++++ core/policy/policy.go | 1 + core/policy/service.go | 12 ++++ internal/store/postgres/policy_repository.go | 58 ++++++++++++++++++++ 8 files changed, 218 insertions(+), 13 deletions(-) diff --git a/core/membership/mocks/policy_service.go b/core/membership/mocks/policy_service.go index ab81d7b31..834eee3dc 100644 --- a/core/membership/mocks/policy_service.go +++ b/core/membership/mocks/policy_service.go @@ -127,6 +127,56 @@ func (_c *PolicyService_Delete_Call) RunAndReturn(run func(context.Context, stri return _c } +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType +func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) + + if len(ret) == 0 { + panic("no return value specified for DeleteWithMinRoleGuard") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// PolicyService_DeleteWithMinRoleGuard_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteWithMinRoleGuard' +type PolicyService_DeleteWithMinRoleGuard_Call struct { + *mock.Call +} + +// DeleteWithMinRoleGuard is a helper method to define mock.On call +// - ctx context.Context +// - id string +// - guardRoleID string +// - resourceID string +// - resourceType string +func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { + return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + }) + return _c +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Return(_a0 error) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { + _c.Call.Return(run) + return _c +} + // List provides a mock function with given fields: ctx, flt func (_m *PolicyService) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) { ret := _m.Called(ctx, flt) diff --git a/core/membership/service.go b/core/membership/service.go index bb81be5c3..99daa9cae 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -28,6 +28,7 @@ type PolicyService interface { Create(ctx context.Context, pol policy.Policy) (policy.Policy, error) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) Delete(ctx context.Context, id string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error } type RelationService interface { @@ -222,7 +223,11 @@ func (s *Service) SetOrganizationMemberRole(ctx context.Context, orgID, principa return err } - if err := s.replacePolicy(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing); err != nil { + ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) + if err != nil { + return fmt.Errorf("get owner role for guard: %w", err) + } + if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRole.ID); err != nil { return err } @@ -449,6 +454,34 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole return nil } +// replacePolicyWithOwnerGuard deletes existing policies using an atomic SQL guard +// that prevents removing the last owner, then creates the new policy. +func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { + for _, p := range existing { + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID, resourceID, resourceType) + if err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + return fmt.Errorf("delete policy %s: %w", p.ID, err) + } + } + + _, err := s.createPolicy(ctx, resourceID, resourceType, principalID, principalType, roleID) + if err != nil { + s.log.ErrorContext(ctx, "membership state inconsistent: old policies deleted but new policy creation failed, needs manual fix", + "resource_id", resourceID, + "resource_type", resourceType, + "principal_id", principalID, + "principal_type", principalType, + "role_id", roleID, + "error", err, + ) + return err + } + return nil +} + // replacePolicy deletes the given existing policies and creates a new one with the new role. func (s *Service) replacePolicy(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy) error { for _, p := range existing { diff --git a/core/membership/service_test.go b/core/membership/service_test.go index 186dd1203..ebdb42cc3 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -412,10 +412,10 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}, {ID: "p2", RoleID: ownerRoleID}}, nil) - // replace policy - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + // replace policy with owner guard + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: viewerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -437,9 +437,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, ownerRoleID).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - // replace policy - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + // replace policy with owner guard + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -460,9 +460,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // relation delete fails with a real error — logged, no rollback relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(errors.New("spicedb connection error")) @@ -477,8 +477,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - policySvc.EXPECT().Delete(ctx, "p1").Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) @@ -544,9 +544,9 @@ func TestService_SetOrganizationMemberRole_ServiceUser(t *testing.T) { mockSuSvc.EXPECT().Get(ctx, suID).Return(serviceuser.ServiceUser{ID: suID, OrgID: orgID, Title: "test-su", State: string(serviceuser.Enabled)}, nil) mockRoleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: suID, PrincipalType: schema.ServiceUserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - mockPolicySvc.EXPECT().Delete(ctx, "p1").Return(nil) + mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) mockPolicySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) mockRelSvc.EXPECT().Delete(ctx, mock.Anything).Return(relation.ErrNotExist).Times(2) mockRelSvc.EXPECT().Create(ctx, mock.Anything).Return(relation.Relation{}, nil) diff --git a/core/policy/errors.go b/core/policy/errors.go index e33649d08..236305b23 100644 --- a/core/policy/errors.go +++ b/core/policy/errors.go @@ -8,4 +8,5 @@ var ( ErrInvalidID = errors.New("policy id is invalid") ErrConflict = errors.New("policy already exist") ErrInvalidDetail = errors.New("invalid policy detail") + ErrLastRoleGuard = errors.New("cannot delete: this is the last policy with the guarded role for this resource") ) diff --git a/core/policy/mocks/repository.go b/core/policy/mocks/repository.go index fdd9172ba..eed4b5f65 100644 --- a/core/policy/mocks/repository.go +++ b/core/policy/mocks/repository.go @@ -126,6 +126,56 @@ func (_c *Repository_Delete_Call) RunAndReturn(run func(context.Context, string) return _c } +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType +func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) + + if len(ret) == 0 { + panic("no return value specified for DeleteWithMinRoleGuard") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Repository_DeleteWithMinRoleGuard_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteWithMinRoleGuard' +type Repository_DeleteWithMinRoleGuard_Call struct { + *mock.Call +} + +// DeleteWithMinRoleGuard is a helper method to define mock.On call +// - ctx context.Context +// - id string +// - guardRoleID string +// - resourceID string +// - resourceType string +func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *Repository_DeleteWithMinRoleGuard_Call { + return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + }) + return _c +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) Return(_a0 error) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { + _c.Call.Return(run) + return _c +} + // Get provides a mock function with given fields: ctx, id func (_m *Repository) Get(ctx context.Context, id string) (policy.Policy, error) { ret := _m.Called(ctx, id) diff --git a/core/policy/policy.go b/core/policy/policy.go index ce2aaecf6..9d13ee764 100644 --- a/core/policy/policy.go +++ b/core/policy/policy.go @@ -13,6 +13,7 @@ type Repository interface { Count(ctx context.Context, f Filter) (int64, error) Upsert(ctx context.Context, pol Policy) (Policy, error) Delete(ctx context.Context, id string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error GroupMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) ProjectMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) OrgMemberCount(ctx context.Context, ID string) (MemberCount, error) diff --git a/core/policy/service.go b/core/policy/service.go index 93075d4c6..62ed62942 100644 --- a/core/policy/service.go +++ b/core/policy/service.go @@ -89,6 +89,18 @@ func (s Service) Delete(ctx context.Context, id string) error { return s.repository.Delete(ctx, id) } +func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + if err := s.relationService.Delete(ctx, relation.Relation{ + Object: relation.Object{ + ID: id, + Namespace: schema.RoleBindingNamespace, + }, + }); err != nil { + return err + } + return s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID, resourceID, resourceType) +} + // AssignRole Note: ideally this should be in a single transaction // read more about how user defined roles work in spicedb https://authzed.com/blog/user-defined-roles func (s Service) AssignRole(ctx context.Context, pol Policy) error { diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index cb834a479..26124390d 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -363,6 +363,64 @@ func (r PolicyRepository) Delete(ctx context.Context, id string) error { return nil } +// DeleteWithMinRoleGuard atomically deletes a policy only if at least one other +// policy with the same guarded role remains for the given resource. This prevents +// the TOCTOU race where concurrent requests both pass a count check then both delete. +func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { + existingPolicy, err := r.Get(ctx, id) + if err != nil { + return err + } + + if err := r.dbc.WithTxn(ctx, sql.TxOptions{}, func(tx *sqlx.Tx) error { + return r.dbc.WithTimeout(ctx, TABLE_POLICIES, "DeleteWithMinRoleGuard", func(ctx context.Context) error { + query := `DELETE FROM policies WHERE id = $1 AND ( + role_id != $2 + OR (SELECT COUNT(*) FROM policies + WHERE resource_id = $3 + AND resource_type = $4 + AND role_id = $2 + AND id != $1 + ) > 0 + )` + result, err := tx.ExecContext(ctx, query, id, guardRoleID, resourceID, resourceType) + if err != nil { + return err + } + rowsAffected, err := result.RowsAffected() + if err != nil { + return err + } + if rowsAffected == 0 { + return policy.ErrLastRoleGuard + } + + policyDB := Policy{ + ID: existingPolicy.ID, + RoleID: existingPolicy.RoleID, + ResourceID: existingPolicy.ResourceID, + ResourceType: existingPolicy.ResourceType, + PrincipalID: existingPolicy.PrincipalID, + PrincipalType: existingPolicy.PrincipalType, + } + auditRecord := r.buildPolicyAuditRecord(ctx, tx, auditrecord.PolicyDeletedEvent, policyDB, time.Now(), nil) + return InsertAuditRecordInTx(ctx, tx, auditRecord) + }) + }); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return err + } + err = checkPostgresError(err) + switch { + case errors.Is(err, sql.ErrNoRows): + return policy.ErrNotExist + default: + return err + } + } + return nil +} + func (r PolicyRepository) GroupMemberCount(ctx context.Context, groupIDs []string) ([]policy.MemberCount, error) { if len(groupIDs) == 0 { return nil, policy.ErrInvalidID From c0d9d9b941388b0d69207ff89a98c54ceb58da49 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Mon, 4 May 2026 14:51:22 +0530 Subject: [PATCH 02/10] fix: address review feedback on TOCTOU last-owner guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Flip SpiceDB/DB ordering — DB delete first, then SpiceDB relation removal. Prevents inconsistent state if the guard rejects the delete. 2. Use SELECT FOR UPDATE in CTE to serialize concurrent deletions under READ COMMITTED isolation. Two concurrent DELETEs of different owner rows now block on the row lock instead of both proceeding. 3. Use TABLE_POLICIES constant instead of hardcoded "policies" string. 4. Derive resource_id/resource_type from existingPolicy inside the repository instead of trusting caller-supplied values. 5. Fetch owner role once — validateMinOwnerConstraint now returns the resolved ownerRoleID for reuse, eliminating the duplicate Get call. --- core/membership/mocks/policy_service.go | 22 ++++++------- core/membership/service.go | 30 +++++++++--------- core/membership/service_test.go | 20 ++++++------ core/policy/mocks/repository.go | 22 ++++++------- core/policy/policy.go | 2 +- core/policy/service.go | 12 +++---- internal/store/postgres/policy_repository.go | 33 +++++++++++++------- 7 files changed, 72 insertions(+), 69 deletions(-) diff --git a/core/membership/mocks/policy_service.go b/core/membership/mocks/policy_service.go index 834eee3dc..c2e1b4dac 100644 --- a/core/membership/mocks/policy_service.go +++ b/core/membership/mocks/policy_service.go @@ -127,17 +127,17 @@ func (_c *PolicyService_Delete_Call) RunAndReturn(run func(context.Context, stri return _c } -// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType -func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID +func (_m *PolicyService) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + ret := _m.Called(ctx, id, guardRoleID) if len(ret) == 0 { panic("no return value specified for DeleteWithMinRoleGuard") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { - r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID) } else { r0 = ret.Error(0) } @@ -154,15 +154,13 @@ type PolicyService_DeleteWithMinRoleGuard_Call struct { // - ctx context.Context // - id string // - guardRoleID string -// - resourceID string -// - resourceType string -func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { - return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +func (_e *PolicyService_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}) *PolicyService_DeleteWithMinRoleGuard_Call { + return &PolicyService_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID)} } -func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *PolicyService_DeleteWithMinRoleGuard_Call { +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string)) *PolicyService_DeleteWithMinRoleGuard_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + run(args[0].(context.Context), args[1].(string), args[2].(string)) }) return _c } @@ -172,7 +170,7 @@ func (_c *PolicyService_DeleteWithMinRoleGuard_Call) Return(_a0 error) *PolicySe return _c } -func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { +func (_c *PolicyService_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string) error) *PolicyService_DeleteWithMinRoleGuard_Call { _c.Call.Return(run) return _c } diff --git a/core/membership/service.go b/core/membership/service.go index 99daa9cae..fdb538626 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -28,7 +28,7 @@ type PolicyService interface { Create(ctx context.Context, pol policy.Policy) (policy.Policy, error) List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) Delete(ctx context.Context, id string) error - DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error } type RelationService interface { @@ -219,15 +219,12 @@ func (s *Service) SetOrganizationMemberRole(ctx context.Context, orgID, principa return nil } - if err := s.validateMinOwnerConstraint(ctx, orgID, resolvedRoleID, existing); err != nil { + ownerRoleID, err := s.validateMinOwnerConstraint(ctx, orgID, resolvedRoleID, existing) + if err != nil { return err } - ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) - if err != nil { - return fmt.Errorf("get owner role for guard: %w", err) - } - if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRole.ID); err != nil { + if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRoleID); err != nil { return err } @@ -277,7 +274,7 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return ErrNotMember } - if err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { + if _, err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { return err } @@ -417,15 +414,16 @@ func (s *Service) removeRelations(ctx context.Context, resourceID, resourceType, } // validateMinOwnerConstraint ensures the org always has at least one owner after a role change. -func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRoleID string, existing []policy.Policy) error { +// Returns the resolved owner role ID for reuse by callers. +func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRoleID string, existing []policy.Policy) (string, error) { ownerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationOwner) if err != nil { - return fmt.Errorf("get owner role: %w", err) + return "", fmt.Errorf("get owner role: %w", err) } // no constraint if promoting to owner if newRoleID == ownerRole.ID { - return nil + return ownerRole.ID, nil } // no constraint if user is not currently an owner @@ -437,7 +435,7 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole } } if !isCurrentlyOwner { - return nil + return ownerRole.ID, nil } // user is owner, being demoted — make sure at least one other owner remains @@ -446,19 +444,19 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole RoleID: ownerRole.ID, }) if err != nil { - return fmt.Errorf("list owner policies: %w", err) + return "", fmt.Errorf("list owner policies: %w", err) } if len(ownerPolicies) <= 1 { - return ErrLastOwnerRole + return "", ErrLastOwnerRole } - return nil + return ownerRole.ID, nil } // replacePolicyWithOwnerGuard deletes existing policies using an atomic SQL guard // that prevents removing the last owner, then creates the new policy. func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { for _, p := range existing { - err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID, resourceID, resourceType) + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) if err != nil { if errors.Is(err, policy.ErrLastRoleGuard) { return ErrLastOwnerRole diff --git a/core/membership/service_test.go b/core/membership/service_test.go index ebdb42cc3..7c8f6f209 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -412,10 +412,10 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}, {ID: "p2", RoleID: ownerRoleID}}, nil) // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: viewerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -437,9 +437,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, ownerRoleID).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -460,9 +460,9 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // relation delete fails with a real error — logged, no rollback relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(errors.New("spicedb connection error")) @@ -477,8 +477,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) - roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) @@ -544,9 +544,9 @@ func TestService_SetOrganizationMemberRole_ServiceUser(t *testing.T) { mockSuSvc.EXPECT().Get(ctx, suID).Return(serviceuser.ServiceUser{ID: suID, OrgID: orgID, Title: "test-su", State: string(serviceuser.Enabled)}, nil) mockRoleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: suID, PrincipalType: schema.ServiceUserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) - mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil).Times(2) + mockRoleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) mockPolicySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) - mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID, orgID, schema.OrganizationNamespace).Return(nil) + mockPolicySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) mockPolicySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) mockRelSvc.EXPECT().Delete(ctx, mock.Anything).Return(relation.ErrNotExist).Times(2) mockRelSvc.EXPECT().Create(ctx, mock.Anything).Return(relation.Relation{}, nil) diff --git a/core/policy/mocks/repository.go b/core/policy/mocks/repository.go index eed4b5f65..7486bbebe 100644 --- a/core/policy/mocks/repository.go +++ b/core/policy/mocks/repository.go @@ -126,17 +126,17 @@ func (_c *Repository_Delete_Call) RunAndReturn(run func(context.Context, string) return _c } -// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID, resourceID, resourceType -func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - ret := _m.Called(ctx, id, guardRoleID, resourceID, resourceType) +// DeleteWithMinRoleGuard provides a mock function with given fields: ctx, id, guardRoleID +func (_m *Repository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + ret := _m.Called(ctx, id, guardRoleID) if len(ret) == 0 { panic("no return value specified for DeleteWithMinRoleGuard") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, string, string, string, string) error); ok { - r0 = rf(ctx, id, guardRoleID, resourceID, resourceType) + if rf, ok := ret.Get(0).(func(context.Context, string, string) error); ok { + r0 = rf(ctx, id, guardRoleID) } else { r0 = ret.Error(0) } @@ -153,15 +153,13 @@ type Repository_DeleteWithMinRoleGuard_Call struct { // - ctx context.Context // - id string // - guardRoleID string -// - resourceID string -// - resourceType string -func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}, resourceID interface{}, resourceType interface{}) *Repository_DeleteWithMinRoleGuard_Call { - return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID, resourceID, resourceType)} +func (_e *Repository_Expecter) DeleteWithMinRoleGuard(ctx interface{}, id interface{}, guardRoleID interface{}) *Repository_DeleteWithMinRoleGuard_Call { + return &Repository_DeleteWithMinRoleGuard_Call{Call: _e.mock.On("DeleteWithMinRoleGuard", ctx, id, guardRoleID)} } -func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string)) *Repository_DeleteWithMinRoleGuard_Call { +func (_c *Repository_DeleteWithMinRoleGuard_Call) Run(run func(ctx context.Context, id string, guardRoleID string)) *Repository_DeleteWithMinRoleGuard_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(string), args[4].(string)) + run(args[0].(context.Context), args[1].(string), args[2].(string)) }) return _c } @@ -171,7 +169,7 @@ func (_c *Repository_DeleteWithMinRoleGuard_Call) Return(_a0 error) *Repository_ return _c } -func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { +func (_c *Repository_DeleteWithMinRoleGuard_Call) RunAndReturn(run func(context.Context, string, string) error) *Repository_DeleteWithMinRoleGuard_Call { _c.Call.Return(run) return _c } diff --git a/core/policy/policy.go b/core/policy/policy.go index 9d13ee764..7287d10b6 100644 --- a/core/policy/policy.go +++ b/core/policy/policy.go @@ -13,7 +13,7 @@ type Repository interface { Count(ctx context.Context, f Filter) (int64, error) Upsert(ctx context.Context, pol Policy) (Policy, error) Delete(ctx context.Context, id string) error - DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error + DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error GroupMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) ProjectMemberCount(ctx context.Context, IDs []string) ([]MemberCount, error) OrgMemberCount(ctx context.Context, ID string) (MemberCount, error) diff --git a/core/policy/service.go b/core/policy/service.go index 62ed62942..e1a432d31 100644 --- a/core/policy/service.go +++ b/core/policy/service.go @@ -89,16 +89,16 @@ func (s Service) Delete(ctx context.Context, id string) error { return s.repository.Delete(ctx, id) } -func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { - if err := s.relationService.Delete(ctx, relation.Relation{ +func (s Service) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { + if err := s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID); err != nil { + return err + } + return s.relationService.Delete(ctx, relation.Relation{ Object: relation.Object{ ID: id, Namespace: schema.RoleBindingNamespace, }, - }); err != nil { - return err - } - return s.repository.DeleteWithMinRoleGuard(ctx, id, guardRoleID, resourceID, resourceType) + }) } // AssignRole Note: ideally this should be in a single transaction diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 26124390d..8596fba4c 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -364,9 +364,11 @@ func (r PolicyRepository) Delete(ctx context.Context, id string) error { } // DeleteWithMinRoleGuard atomically deletes a policy only if at least one other -// policy with the same guarded role remains for the given resource. This prevents -// the TOCTOU race where concurrent requests both pass a count check then both delete. -func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string, resourceID string, resourceType string) error { +// policy with the same guarded role remains for the resource. Uses SELECT FOR UPDATE +// to serialize concurrent deletions under READ COMMITTED isolation, preventing the +// TOCTOU race where two concurrent requests both pass a count check then both delete. +// Resource ID and type are derived from the existing policy, not from caller input. +func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, guardRoleID string) error { existingPolicy, err := r.Get(ctx, id) if err != nil { return err @@ -374,16 +376,23 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, if err := r.dbc.WithTxn(ctx, sql.TxOptions{}, func(tx *sqlx.Tx) error { return r.dbc.WithTimeout(ctx, TABLE_POLICIES, "DeleteWithMinRoleGuard", func(ctx context.Context) error { - query := `DELETE FROM policies WHERE id = $1 AND ( - role_id != $2 - OR (SELECT COUNT(*) FROM policies - WHERE resource_id = $3 - AND resource_type = $4 - AND role_id = $2 - AND id != $1 - ) > 0 + query := `WITH locked AS ( + SELECT id FROM ` + TABLE_POLICIES + ` + WHERE resource_id = $2 + AND resource_type = $3 + AND role_id = $4 + FOR UPDATE + ) + DELETE FROM ` + TABLE_POLICIES + ` WHERE id = $1 AND ( + (SELECT role_id FROM ` + TABLE_POLICIES + ` WHERE id = $1) != $4 + OR (SELECT COUNT(*) FROM locked WHERE id != $1) > 0 )` - result, err := tx.ExecContext(ctx, query, id, guardRoleID, resourceID, resourceType) + result, err := tx.ExecContext(ctx, query, + id, + existingPolicy.ResourceID, + existingPolicy.ResourceType, + guardRoleID, + ) if err != nil { return err } From 181f22ee8849b342c9e599dad8f6571c2b2c7d61 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Mon, 4 May 2026 21:15:43 +0530 Subject: [PATCH 03/10] fix: address round 2 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Only use guarded delete for owner policies, plain Delete for non-owner (avoids unnecessary locking/serialization) - Apply guarded delete in RemoveOrganizationMember flow too (same TOCTOU) - Add unit test for ErrLastRoleGuard → ErrLastOwnerRole mapping - Fix test expectations for non-owner policy deletions --- core/membership/service.go | 47 ++++++++++++++++++++++++--------- core/membership/service_test.go | 23 +++++++++++++--- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index fdb538626..14b79d4b4 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -274,7 +274,8 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return ErrNotMember } - if _, err = s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies); err != nil { + ownerRoleID, err := s.validateMinOwnerConstraint(ctx, orgID, "", orgPolicies) + if err != nil { return err } @@ -308,13 +309,13 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal // delete sub-resource policies first (projects, groups), then relations, // then org policies last — so a retry after partial failure won't hit ErrNotMember - var orgPolicyIDs []string + var orgPoliciesToDelete []policy.Policy var errs error for _, pol := range allPolicies { switch pol.ResourceType { case schema.OrganizationNamespace: if pol.ResourceID == orgID { - orgPolicyIDs = append(orgPolicyIDs, pol.ID) + orgPoliciesToDelete = append(orgPoliciesToDelete, pol) } case schema.ProjectNamespace: if _, ok := orgProjectIDSet[pol.ResourceID]; ok { @@ -377,17 +378,31 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // delete org-level policies last - for _, policyID := range orgPolicyIDs { - if err := s.policyService.Delete(ctx, policyID); err != nil { + // delete org-level policies last, using guarded delete for owner policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID == ownerRoleID { + if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", "org_id", orgID, - "policy_id", policyID, + "policy_id", pol.ID, "principal_id", principalID, "principal_type", principalType, "error", err, ) - return fmt.Errorf("delete org policy %s: %w", policyID, err) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } } @@ -456,12 +471,18 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole // that prevents removing the last owner, then creates the new policy. func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { for _, p := range existing { - err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) - if err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole + if p.RoleID == ownerRoleID { + err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) + if err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + return fmt.Errorf("delete policy %s: %w", p.ID, err) + } + } else { + if err := s.policyService.Delete(ctx, p.ID); err != nil { + return fmt.Errorf("delete policy %s: %w", p.ID, err) } - return fmt.Errorf("delete policy %s: %w", p.ID, err) } } diff --git a/core/membership/service_test.go b/core/membership/service_test.go index 7c8f6f209..da3bffcbc 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -405,6 +405,22 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleID: managerRoleID, wantErr: membership.ErrLastOwnerRole, }, + { + name: "should return ErrLastOwnerRole when DB guard rejects concurrent demotion", + setup: func(policySvc *mocks.PolicyService, _ *mocks.RelationService, roleSvc *mocks.RoleService, orgSvc *mocks.OrgService, userSvc *mocks.UserService, _ *mocks.AuditRecordRepository) { + orgSvc.EXPECT().Get(ctx, orgID).Return(enabledOrg, nil) + userSvc.EXPECT().GetByID(ctx, userID).Return(enabledUser, nil) + roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) + policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: ownerRoleID}}, nil) + roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) + // app-level check passes (sees 2 owners) + policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, RoleID: ownerRoleID}).Return([]policy.Policy{{ID: "p1"}, {ID: "p2"}}, nil) + // DB-level guard rejects (concurrent request already deleted the other owner) + policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(policy.ErrLastRoleGuard) + }, + roleID: viewerRoleID, + wantErr: membership.ErrLastOwnerRole, + }, { name: "should succeed demoting owner to viewer with multiple owners", setup: func(policySvc *mocks.PolicyService, relSvc *mocks.RelationService, roleSvc *mocks.RoleService, orgSvc *mocks.OrgService, userSvc *mocks.UserService, auditRepo *mocks.AuditRecordRepository) { @@ -438,8 +454,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: viewerRoleID}}, nil) // promoting to owner — min-owner constraint doesn't apply roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - // replace policy with owner guard - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) + // existing policy is viewer (non-owner), uses plain Delete + policySvc.EXPECT().Delete(ctx, "p1").Return(nil) policySvc.EXPECT().Create(ctx, policy.Policy{ RoleID: ownerRoleID, ResourceID: orgID, ResourceType: schema.OrganizationNamespace, PrincipalID: userID, PrincipalType: schema.UserPrincipal, @@ -478,7 +494,8 @@ func TestService_SetOrganizationMemberRole(t *testing.T) { roleSvc.EXPECT().Get(ctx, viewerRoleID).Return(role.Role{ID: viewerRoleID, Scopes: []string{schema.OrganizationNamespace}}, nil) policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "p1", RoleID: managerRoleID}}, nil) roleSvc.EXPECT().Get(ctx, schema.RoleOrganizationOwner).Return(role.Role{ID: ownerRoleID, Name: schema.RoleOrganizationOwner}, nil) - policySvc.EXPECT().DeleteWithMinRoleGuard(ctx, "p1", ownerRoleID).Return(nil) + // existing policy is manager (non-owner), uses plain Delete + policySvc.EXPECT().Delete(ctx, "p1").Return(nil) policySvc.EXPECT().Create(ctx, mock.Anything).Return(policy.Policy{}, nil) // both relation deletes return not-found — that's fine, should continue relSvc.EXPECT().Delete(ctx, orgRelation(schema.OwnerRelationName)).Return(relation.ErrNotExist) From 84e8170c42c0a5772990abb1f5d0e364a19b8c3f Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Tue, 5 May 2026 12:57:11 +0530 Subject: [PATCH 04/10] fix: move guarded org policy delete before relation cleanup in RemoveOrganizationMember If the guard rejects (last owner), we now return early before touching any SpiceDB relations, preventing partial state modification on rejection. --- core/membership/service.go | 61 ++++++++++++++++----------------- core/membership/service_test.go | 8 +++-- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index 14b79d4b4..ab6cc9755 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -341,7 +341,36 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return errs } - // remove relations at group level + // delete org-level owner policies first via guarded delete — if the guard rejects + // (last owner), we return early before touching any relations or other policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID == ownerRoleID { + if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole + } + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { + s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", + "org_id", orgID, + "policy_id", pol.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } + + // guarded deletes passed — safe to clean up relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { s.log.Error("partial failure removing member: group relation cleanup failed, manual cleanup may be needed", @@ -355,7 +384,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // remove relations at org level if err := s.removeRelations(ctx, orgID, schema.OrganizationNamespace, principalID, principalType); err != nil { s.log.Error("partial failure removing member: org relation cleanup failed, manual cleanup may be needed", "org_id", orgID, @@ -366,7 +394,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return fmt.Errorf("remove org relations: %w", err) } - // remove identity link for service users (serviceuser#org@organization) if principalType == schema.ServiceUserPrincipal { err := s.relationService.Delete(ctx, relation.Relation{ Object: relation.Object{ID: principalID, Namespace: schema.ServiceUserPrincipal}, @@ -378,34 +405,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // delete org-level policies last, using guarded delete for owner policies - for _, pol := range orgPoliciesToDelete { - if pol.RoleID == ownerRoleID { - if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole - } - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) - } - } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) - } - } - s.auditOrgMemberRemoved(ctx, org, principalID, targetAuditType) audit.GetAuditor(ctx, org.ID).Log(audit.OrgMemberDeletedEvent, audit.Target{ ID: principalID, diff --git a/core/membership/service_test.go b/core/membership/service_test.go index da3bffcbc..a56561601 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -772,7 +772,7 @@ func TestService_RemoveOrganizationMember(t *testing.T) { wantErrContain: "delete project policy", }, { - name: "should return error if org relation removal fails without deleting org policies", + name: "should return error if org relation removal fails after org policies deleted", setup: func(d testDeps) { d.orgSvc.EXPECT().Get(ctx, orgID).Return(enabledOrg, nil) d.policySvc.EXPECT().List(ctx, policy.Filter{OrgID: orgID, PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{{ID: "org-p1", RoleID: viewerRoleID}}, nil) @@ -780,9 +780,11 @@ func TestService_RemoveOrganizationMember(t *testing.T) { d.projSvc.EXPECT().List(ctx, project.Filter{OrgID: orgID}).Return([]project.Project{}, nil) d.grpSvc.EXPECT().List(ctx, group.Filter{OrganizationID: orgID}).Return([]group.Group{}, nil) d.policySvc.EXPECT().List(ctx, policy.Filter{PrincipalID: userID, PrincipalType: schema.UserPrincipal}).Return([]policy.Policy{ - {ID: "org-p1", ResourceType: schema.OrganizationNamespace, ResourceID: orgID}, + {ID: "org-p1", ResourceType: schema.OrganizationNamespace, ResourceID: orgID, RoleID: viewerRoleID}, }, nil) - // org policy Delete should NOT be called — relations fail first, org policies are last + // org policy deleted first (viewer, plain Delete) + d.policySvc.EXPECT().Delete(ctx, "org-p1").Return(nil) + // then relation removal fails d.relSvc.EXPECT().Delete(ctx, relation.Relation{Object: orgObj, Subject: userSub, RelationName: schema.OwnerRelationName}).Return(errors.New("spicedb down")) }, wantErrContain: "remove org relations", From 65506cd2767437680a4e46f831f85ff05bdcecfc Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Tue, 5 May 2026 15:23:05 +0530 Subject: [PATCH 05/10] fix: run guarded owner delete before any sub-resource policy deletions Restructured RemoveOrganizationMember into 3 phases: 1. Classify policies by type (no deletions) 2. Guarded org owner delete (rejects early if last owner) 3. Remaining org policies, sub-resource policies, then relations This ensures guard rejection leaves no partial state. --- core/membership/service.go | 66 +++++++++++++++------------------ core/membership/service_test.go | 2 +- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index ab6cc9755..c817aa7c7 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -307,10 +307,9 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return fmt.Errorf("list all principal policies: %w", err) } - // delete sub-resource policies first (projects, groups), then relations, - // then org policies last — so a retry after partial failure won't hit ErrNotMember + // Phase 1: classify policies by type (no deletions yet) var orgPoliciesToDelete []policy.Policy - var errs error + var subResourcePolicies []policy.Policy for _, pol := range allPolicies { switch pol.ResourceType { case schema.OrganizationNamespace: @@ -319,57 +318,52 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } case schema.ProjectNamespace: if _, ok := orgProjectIDSet[pol.ResourceID]; ok { - if err := s.policyService.Delete(ctx, pol.ID); err != nil { - errs = errors.Join(errs, fmt.Errorf("delete project policy %s: %w", pol.ID, err)) - } + subResourcePolicies = append(subResourcePolicies, pol) } case schema.GroupNamespace: if _, ok := orgGroupIDSet[pol.ResourceID]; ok { - if err := s.policyService.Delete(ctx, pol.ID); err != nil { - errs = errors.Join(errs, fmt.Errorf("delete group policy %s: %w", pol.ID, err)) - } + subResourcePolicies = append(subResourcePolicies, pol) } } } - if errs != nil { - s.log.Error("partial failure removing member: some policies could not be deleted, manual cleanup may be needed", - "org_id", orgID, - "principal_id", principalID, - "principal_type", principalType, - "error", errs, - ) - return errs - } - // delete org-level owner policies first via guarded delete — if the guard rejects - // (last owner), we return early before touching any relations or other policies + // Phase 2: guarded org owner policy delete runs first — if the guard rejects + // (last owner), we return early before touching any other policies or relations for _, pol := range orgPoliciesToDelete { if pol.RoleID == ownerRoleID { if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { if errors.Is(err, policy.ErrLastRoleGuard) { return ErrLastOwnerRole } - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } - } else if err := s.policyService.Delete(ctx, pol.ID); err != nil { - s.log.Error("partial failure removing member: org policy deletion failed, manual cleanup may be needed", - "org_id", orgID, - "policy_id", pol.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } } + // Phase 3: guard passed — safe to delete remaining org policies, sub-resource policies + for _, pol := range orgPoliciesToDelete { + if pol.RoleID != ownerRoleID { + if err := s.policyService.Delete(ctx, pol.ID); err != nil { + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + } + } + } + var errs error + for _, pol := range subResourcePolicies { + if err := s.policyService.Delete(ctx, pol.ID); err != nil { + errs = errors.Join(errs, fmt.Errorf("delete sub-resource policy %s: %w", pol.ID, err)) + } + } + if errs != nil { + s.log.Error("partial failure removing member: some policies could not be deleted, manual cleanup may be needed", + "org_id", orgID, + "principal_id", principalID, + "principal_type", principalType, + "error", errs, + ) + return errs + } + // guarded deletes passed — safe to clean up relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { diff --git a/core/membership/service_test.go b/core/membership/service_test.go index a56561601..175773583 100644 --- a/core/membership/service_test.go +++ b/core/membership/service_test.go @@ -769,7 +769,7 @@ func TestService_RemoveOrganizationMember(t *testing.T) { }, nil) d.policySvc.EXPECT().Delete(ctx, "proj-p1").Return(errors.New("delete failed")) }, - wantErrContain: "delete project policy", + wantErrContain: "delete sub-resource policy", }, { name: "should return error if org relation removal fails after org policies deleted", From 4e6d9cbfe8c31e13d144c2dff1bc0dcadaa87a2a Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 10:42:59 +0530 Subject: [PATCH 06/10] fix: differentiate guard rejection from concurrently deleted policy When rowsAffected==0, re-check existence inside the transaction. If the row is gone (concurrent delete), return sql.ErrNoRows which maps to ErrNotExist. Only return ErrLastRoleGuard when the row still exists but the guard condition prevented deletion. --- internal/store/postgres/policy_repository.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 8596fba4c..2f61f514e 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -401,6 +401,12 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, return err } if rowsAffected == 0 { + var existingID string + if err := tx.QueryRowContext(ctx, + `SELECT id FROM `+TABLE_POLICIES+` WHERE id = $1`, id, + ).Scan(&existingID); errors.Is(err, sql.ErrNoRows) { + return sql.ErrNoRows + } return policy.ErrLastRoleGuard } From 890ef9ec9a5647eae5fd62cba762f52e43741f6b Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 10:57:30 +0530 Subject: [PATCH 07/10] fix: propagate non-ErrNoRows errors from existence re-check --- internal/store/postgres/policy_repository.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 2f61f514e..144786034 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -402,11 +402,15 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, } if rowsAffected == 0 { var existingID string - if err := tx.QueryRowContext(ctx, + err := tx.QueryRowContext(ctx, `SELECT id FROM `+TABLE_POLICIES+` WHERE id = $1`, id, - ).Scan(&existingID); errors.Is(err, sql.ErrNoRows) { + ).Scan(&existingID) + if errors.Is(err, sql.ErrNoRows) { return sql.ErrNoRows } + if err != nil { + return err + } return policy.ErrLastRoleGuard } From cfdf07e7165fe1adb86b919ce782d35a38a4f4cd Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 6 May 2026 13:11:21 +0530 Subject: [PATCH 08/10] fix: add ORDER BY to FOR UPDATE CTE to prevent deadlocks --- internal/store/postgres/policy_repository.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/store/postgres/policy_repository.go b/internal/store/postgres/policy_repository.go index 144786034..aba751860 100644 --- a/internal/store/postgres/policy_repository.go +++ b/internal/store/postgres/policy_repository.go @@ -381,6 +381,7 @@ func (r PolicyRepository) DeleteWithMinRoleGuard(ctx context.Context, id string, WHERE resource_id = $2 AND resource_type = $3 AND role_id = $4 + ORDER BY id FOR UPDATE ) DELETE FROM ` + TABLE_POLICIES + ` WHERE id = $1 AND ( From ce29b1566608810c3d322211e04173ea25262621 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 13 May 2026 14:33:38 +0530 Subject: [PATCH 09/10] refactor: extract cascadeRemovePrincipal and merge replacePolicy variants --- core/membership/service.go | 129 ++++++++++++++----------------------- 1 file changed, 47 insertions(+), 82 deletions(-) diff --git a/core/membership/service.go b/core/membership/service.go index c817aa7c7..1578726ca 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -224,7 +224,7 @@ func (s *Service) SetOrganizationMemberRole(ctx context.Context, orgID, principa return err } - if err := s.replacePolicyWithOwnerGuard(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRoleID); err != nil { + if err := s.replacePolicy(ctx, orgID, schema.OrganizationNamespace, principalID, principalType, resolvedRoleID, existing, ownerRoleID); err != nil { return err } @@ -279,7 +279,26 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return err } - // pre-compute org project and group ID sets for O(1) lookups + if err := s.cascadeRemovePrincipal(ctx, org, principalID, principalType, ownerRoleID); err != nil { + return err + } + + s.auditOrgMemberRemoved(ctx, org, principalID, targetAuditType) + audit.GetAuditor(ctx, org.ID).Log(audit.OrgMemberDeletedEvent, audit.Target{ + ID: principalID, + Type: principalType, + }) + + return nil +} + +// cascadeRemovePrincipal deletes all policies and SpiceDB relations for a principal +// being removed from an organization, including cascaded project/group sub-resources. +// Owner-role org policies are deleted with the atomic guard first; if the guard rejects +// (last owner), the method returns ErrLastOwnerRole before any other mutation. +func (s *Service) cascadeRemovePrincipal(ctx context.Context, org organization.Organization, principalID, principalType, ownerRoleID string) error { + orgID := org.ID + orgProjects, err := s.projectService.List(ctx, project.Filter{OrgID: orgID}) if err != nil { return fmt.Errorf("list org projects: %w", err) @@ -298,7 +317,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal orgGroupIDSet[g.ID] = struct{}{} } - // list all policies for the principal across all resources allPolicies, err := s.policyService.List(ctx, policy.Filter{ PrincipalID: principalID, PrincipalType: principalType, @@ -307,14 +325,13 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return fmt.Errorf("list all principal policies: %w", err) } - // Phase 1: classify policies by type (no deletions yet) - var orgPoliciesToDelete []policy.Policy - var subResourcePolicies []policy.Policy + // classify policies by scope + var orgPolicies, subResourcePolicies []policy.Policy for _, pol := range allPolicies { switch pol.ResourceType { case schema.OrganizationNamespace: if pol.ResourceID == orgID { - orgPoliciesToDelete = append(orgPoliciesToDelete, pol) + orgPolicies = append(orgPolicies, pol) } case schema.ProjectNamespace: if _, ok := orgProjectIDSet[pol.ResourceID]; ok { @@ -327,27 +344,17 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - // Phase 2: guarded org owner policy delete runs first — if the guard rejects - // (last owner), we return early before touching any other policies or relations - for _, pol := range orgPoliciesToDelete { - if pol.RoleID == ownerRoleID { - if err := s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID); err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole - } - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) + // guarded owner delete first — returns early if this is the last owner + for _, pol := range orgPolicies { + if err := s.deletePolicy(ctx, pol, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole } + return fmt.Errorf("delete org policy %s: %w", pol.ID, err) } } - // Phase 3: guard passed — safe to delete remaining org policies, sub-resource policies - for _, pol := range orgPoliciesToDelete { - if pol.RoleID != ownerRoleID { - if err := s.policyService.Delete(ctx, pol.ID); err != nil { - return fmt.Errorf("delete org policy %s: %w", pol.ID, err) - } - } - } + // guard passed — delete sub-resource policies var errs error for _, pol := range subResourcePolicies { if err := s.policyService.Delete(ctx, pol.ID); err != nil { @@ -364,30 +371,17 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal return errs } - // guarded deletes passed — safe to clean up relations + // clean up SpiceDB relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { - s.log.Error("partial failure removing member: group relation cleanup failed, manual cleanup may be needed", - "org_id", orgID, - "group_id", g.ID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) return fmt.Errorf("remove group %s relations: %w", g.ID, err) } } - if err := s.removeRelations(ctx, orgID, schema.OrganizationNamespace, principalID, principalType); err != nil { - s.log.Error("partial failure removing member: org relation cleanup failed, manual cleanup may be needed", - "org_id", orgID, - "principal_id", principalID, - "principal_type", principalType, - "error", err, - ) return fmt.Errorf("remove org relations: %w", err) } + // remove identity link for service users if principalType == schema.ServiceUserPrincipal { err := s.relationService.Delete(ctx, relation.Relation{ Object: relation.Object{ID: principalID, Namespace: schema.ServiceUserPrincipal}, @@ -399,12 +393,6 @@ func (s *Service) RemoveOrganizationMember(ctx context.Context, orgID, principal } } - s.auditOrgMemberRemoved(ctx, org, principalID, targetAuditType) - audit.GetAuditor(ctx, org.ID).Log(audit.OrgMemberDeletedEvent, audit.Target{ - ID: principalID, - Type: principalType, - }) - return nil } @@ -460,22 +448,16 @@ func (s *Service) validateMinOwnerConstraint(ctx context.Context, orgID, newRole return ownerRole.ID, nil } -// replacePolicyWithOwnerGuard deletes existing policies using an atomic SQL guard -// that prevents removing the last owner, then creates the new policy. -func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { +// replacePolicy deletes the given existing policies and creates a new one with the new role. +// When ownerRoleID is non-empty, owner-role policies are deleted atomically via +// DeleteWithMinRoleGuard to prevent the last-owner TOCTOU race. +func (s *Service) replacePolicy(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy, ownerRoleID string) error { for _, p := range existing { - if p.RoleID == ownerRoleID { - err := s.policyService.DeleteWithMinRoleGuard(ctx, p.ID, ownerRoleID) - if err != nil { - if errors.Is(err, policy.ErrLastRoleGuard) { - return ErrLastOwnerRole - } - return fmt.Errorf("delete policy %s: %w", p.ID, err) - } - } else { - if err := s.policyService.Delete(ctx, p.ID); err != nil { - return fmt.Errorf("delete policy %s: %w", p.ID, err) + if err := s.deletePolicy(ctx, p, ownerRoleID); err != nil { + if errors.Is(err, policy.ErrLastRoleGuard) { + return ErrLastOwnerRole } + return fmt.Errorf("delete policy %s: %w", p.ID, err) } } @@ -494,28 +476,11 @@ func (s *Service) replacePolicyWithOwnerGuard(ctx context.Context, resourceID, r return nil } -// replacePolicy deletes the given existing policies and creates a new one with the new role. -func (s *Service) replacePolicy(ctx context.Context, resourceID, resourceType, principalID, principalType, roleID string, existing []policy.Policy) error { - for _, p := range existing { - err := s.policyService.Delete(ctx, p.ID) - if err != nil { - return fmt.Errorf("delete policy %s: %w", p.ID, err) - } +func (s *Service) deletePolicy(ctx context.Context, pol policy.Policy, ownerRoleID string) error { + if ownerRoleID != "" && pol.RoleID == ownerRoleID { + return s.policyService.DeleteWithMinRoleGuard(ctx, pol.ID, ownerRoleID) } - - _, err := s.createPolicy(ctx, resourceID, resourceType, principalID, principalType, roleID) - if err != nil { - s.log.ErrorContext(ctx, "membership state inconsistent: old policies deleted but new policy creation failed, needs manual fix", - "resource_id", resourceID, - "resource_type", resourceType, - "principal_id", principalID, - "principal_type", principalType, - "role_id", roleID, - "error", err, - ) - return err - } - return nil + return s.policyService.Delete(ctx, pol.ID) } // replaceRelation deletes the given old relations for the principal on the resource, @@ -788,7 +753,7 @@ func (s *Service) SetProjectMemberRole(ctx context.Context, projectID, principal return nil } - if err := s.replacePolicy(ctx, projectID, schema.ProjectNamespace, principalID, principalType, resolvedRoleID, existing); err != nil { + if err := s.replacePolicy(ctx, projectID, schema.ProjectNamespace, principalID, principalType, resolvedRoleID, existing, ""); err != nil { return err } @@ -1163,7 +1128,7 @@ func (s *Service) SetGroupMemberRole(ctx context.Context, groupID, principalID, return err } - if err := s.replacePolicy(ctx, groupID, schema.GroupNamespace, principalID, principalType, resolvedRoleID, existing); err != nil { + if err := s.replacePolicy(ctx, groupID, schema.GroupNamespace, principalID, principalType, resolvedRoleID, existing, ""); err != nil { return err } From d4c4c52f1cfaa89cb88b099596b6354736cbe952 Mon Sep 17 00:00:00 2001 From: Rohil Surana Date: Wed, 13 May 2026 16:38:02 +0530 Subject: [PATCH 10/10] fix: restore error logs for relation cleanup failures in cascadeRemovePrincipal --- core/membership/service.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/core/membership/service.go b/core/membership/service.go index 1578726ca..25bea4edc 100644 --- a/core/membership/service.go +++ b/core/membership/service.go @@ -374,10 +374,23 @@ func (s *Service) cascadeRemovePrincipal(ctx context.Context, org organization.O // clean up SpiceDB relations for _, g := range orgGroups { if err := s.removeRelations(ctx, g.ID, schema.GroupNamespace, principalID, principalType); err != nil { + s.log.Error("partial failure removing member: group relation cleanup failed, manual cleanup may be needed", + "org_id", orgID, + "group_id", g.ID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) return fmt.Errorf("remove group %s relations: %w", g.ID, err) } } if err := s.removeRelations(ctx, orgID, schema.OrganizationNamespace, principalID, principalType); err != nil { + s.log.Error("partial failure removing member: org relation cleanup failed, manual cleanup may be needed", + "org_id", orgID, + "principal_id", principalID, + "principal_type", principalType, + "error", err, + ) return fmt.Errorf("remove org relations: %w", err) }