diff --git a/.markdownlint.jsonc b/.markdownlint.jsonc new file mode 100644 index 0000000000..ff65f434f4 --- /dev/null +++ b/.markdownlint.jsonc @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Configuration for the markdownlint VS Code extension. +// See https://github.com/DavidAnson/markdownlint for rule documentation. +{ + // MD013 - Line length: enforce a maximum line length of 100 characters + "MD013": { + "line_length": 100, + "tables": false + }, + // MD024 - No duplicate headings: only flag duplicates among sibling headings + // (allows the same heading text under different parents) + "MD024": { + "siblings_only": true + } +} diff --git a/.vscode/mcp.json b/.vscode/mcp.json index a3f5378943..f58c1a234a 100644 --- a/.vscode/mcp.json +++ b/.vscode/mcp.json @@ -59,6 +59,16 @@ "type": "http", "url": "https://mcp.bluebird-ai.net/" }, + "bluebird-mcp-dsmaindev": { + "headers": { + "x-mcp-ec-branch": "master", + "x-mcp-ec-organization": "msdata", + "x-mcp-ec-project": "Database Systems", + "x-mcp-ec-repository": "DsMainDev" + }, + "type": "http", + "url": "https://mcp.bluebird-ai.net/" + }, "github": { "type": "http", "url": "https://api.githubcopilot.com/mcp/" @@ -72,4 +82,4 @@ "url": "https://learn.microsoft.com/api/mcp" } } -} \ No newline at end of file +} diff --git a/plans/database_context/00-overview.md b/plans/database_context/00-overview.md new file mode 100644 index 0000000000..ef94caaecc --- /dev/null +++ b/plans/database_context/00-overview.md @@ -0,0 +1,68 @@ +# Database Context Preservation Across Internal Reconnections + +## Issue + +[dotnet/SqlClient#4108](https://github.com/dotnet/SqlClient/issues/4108) — `SqlConnection` doesn't +restore database in the new session if connection is lost. + +When a user changes the active database via `USE [db]` through `SqlCommand.ExecuteNonQuery()`, and +the physical connection subsequently breaks and is transparently reconnected, the recovered session +may land on the **initial catalog** from the connection string instead of the database the user +switched to. + +## Status + +**Fix implemented and validated.** The root cause (Issue G) has been identified and fixed. See +[03-issues.md](03-issues.md) for the full issue list and [04-recommendations.md](04-recommendations.md) +for the fix details. Server-side analysis of the SQL Server engine's session recovery code confirms +the fix is correct — see [06-server-side-analysis.md](06-server-side-analysis.md). + +### Root Cause + +`CompleteLogin()` in `SqlConnectionInternal.cs` trusted the server's `ENV_CHANGE` response +unconditionally after session recovery. If the server did not properly restore the database context, +the client silently ended up on the wrong database. + +### Server-Side Confirmation + +Analysis of the SQL Server source code (`featureext.cpp`, `login.cpp`, `session.cpp`) confirms that +the server correctly implements session recovery for database context — the recovery database from +the ToBe chunk is treated as mandatory (`Source #0`) with no silent fallback. The root cause is +entirely **client-side**: `CompleteLogin()` did not verify the server's response matched the recovery +target. + +### Fix + +After session recovery completes in `CompleteLogin()`, the fix compares `CurrentDatabase` (set by +the server's `ENV_CHANGE`) against the database from `_recoverySessionData`. If they differ, a +`USE [database]` command is sent to the server to force alignment. This guarantees both client and +server are on the correct database after recovery, regardless of server behavior. + +The fix is gated behind the `Switch.Microsoft.Data.SqlClient.VerifyRecoveredDatabaseContext` +AppContext switch (default: `true`). Manual tests set it to `false` to confirm the server-only path +fails without the fix. + +### Key Properties of the Fix + +- **Correct**: Both client and server are guaranteed to be on the same database after recovery +- **Safe**: Only executes during reconnection (`_recoverySessionData != null`), never on first login +- **Efficient**: No overhead when the server properly restores the database (the common case) +- **Defensive**: Handles both wrong-database and missing-ENV_CHANGE server behaviors + +## Scope + +This analysis covers every code path where an internal reconnection can occur and evaluates whether +the current database context (`CurrentDatabase`) is correctly maintained. The assumption is: + +> Any internal reconnection within `SqlConnection` must maintain the current database context. + +## Documents + +| File | Contents | +| ---- | -------- | +| [01-architecture.md](01-architecture.md) | How database context is tracked and how session recovery works | +| [02-flows.md](02-flows.md) | Every reconnection flow, annotated with database context behaviour | +| [03-issues.md](03-issues.md) | Identified bugs and gaps, ranked by severity | +| [04-recommendations.md](04-recommendations.md) | Proposed fixes | +| [05-reconnection-and-retry-mechanisms.md](05-reconnection-and-retry-mechanisms.md) | All retry/reconnection mechanisms with public doc cross-references | +| [06-server-side-analysis.md](06-server-side-analysis.md) | SQL Server engine session recovery internals, including `ParseFeatureData`, `ParseSessionDataChunk`, `FDetermineSessionDb`, test coverage gaps | diff --git a/plans/database_context/01-architecture.md b/plans/database_context/01-architecture.md new file mode 100644 index 0000000000..dbb79c9f6a --- /dev/null +++ b/plans/database_context/01-architecture.md @@ -0,0 +1,140 @@ +# Architecture: Database Context Tracking and Session Recovery + +## Key Data Structures + +### `SqlConnectionInternal` fields + +| Field | Type | Set during | Purpose | +| ----- | ---- | ---------- | ------- | +| `CurrentDatabase` | `string` | Login, ENV_CHANGE | The database the server considers active right now | +| `_originalDatabase` | `string` | Login, constructor | Reset target for pool recycling (`ResetConnection()` restores to this value) | +| `_currentSessionData` | `SessionData` | Constructor | Live session state, snapshotted before reconnection | +| `_recoverySessionData` | `SessionData` | Constructor (param) | Saved session from the broken connection, used to build the recovery login packet | +| `_fConnectionOpen` | `bool` | `CompleteLogin()` | Guards whether ENV_CHANGE updates `_originalDatabase` | +| `_sessionRecoveryAcknowledged` | `bool` | `OnFeatureExtAck()` | Whether the server supports session recovery | + +### `SessionData` fields + +| Field | Mutated by | Cleared by `Reset()` | Purpose | +| ----- | ---------- | -------------------- | ------- | +| `_initialDatabase` | `CompleteLogin()` (first login only) | No | Immutable baseline from the login the server confirmed | +| `_database` | `CurrentSessionData` getter | Yes (set to `null`) | Current database, written just-in-time before snapshot | +| `_initialLanguage` | `CompleteLogin()` | No | Immutable baseline language | +| `_language` | `CurrentSessionData` getter | Yes | Current language | +| `_initialCollation` | `CompleteLogin()` | No | Immutable baseline collation | +| `_collation` | `OnEnvChange()` | Yes | Current collation | +| `_delta[]` | `OnFeatureExtAck()`, `SQLSESSIONSTATE` token handler | Yes | Per-stateID session variable changes | +| `_initialState[]` | `OnFeatureExtAck()` (first login only) | No | Per-stateID session variable baselines | +| `_unrecoverableStatesCount` | `SQLSESSIONSTATE` token handler | Yes | Count of non-recoverable session states | + +`Reset()` is called when `ENV_SPRESETCONNECTIONACK` arrives (server acknowledged +`sp_reset_connection`). It clears delta/current state but preserves the immutable baselines. + +## How `CurrentDatabase` is set + +### During login + +```text +Login() → CurrentDatabase = server.ResolvedDatabaseName + (= ConnectionOptions.InitialCatalog) +Server login response → ENV_CHANGE(ENV_DATABASE) → CurrentDatabase = newValue +CompleteLogin() → _currentSessionData._initialDatabase = CurrentDatabase + (only when _recoverySessionData == null, i.e. first login) +``` + +`SqlConnectionInternal.cs` line 2976—sets `CurrentDatabase` to `InitialCatalog` immediately. The +server then confirms (or overrides) via ENV_CHANGE before `CompleteLogin()` captures it. + +### During normal operation + +```text +USE [MyDb] via SqlCommand → server response → ENV_CHANGE(ENV_DATABASE) +OnEnvChange() → CurrentDatabase = "MyDb" + _originalDatabase NOT updated (guarded by _fConnectionOpen) +``` + +`SqlConnectionInternal.cs` lines 1155–1164. After the connection is open, `_originalDatabase` is frozen. + +### During pool reset + +```text +Deactivate() → ResetConnection() + → _parser.PrepareResetConnection() (sets TDS header flag for sp_reset_connection) + → CurrentDatabase = _originalDatabase (resets to initial catalog immediately) +``` + +`SqlConnectionInternal.cs` lines 3895–3907. + +### `CurrentSessionData` getter (just-in-time snapshot) + +```csharp +internal SessionData CurrentSessionData +{ + get + { + if (_currentSessionData != null) + { + _currentSessionData._database = CurrentDatabase; + _currentSessionData._language = _currentLanguage; + } + return _currentSessionData; + } +} +``` + +`SqlConnectionInternal.cs` lines 530–537. This is called by `ValidateAndReconnect()` right before +saving recovery data for reconnection. + +## Session Recovery Protocol + +When `ConnectRetryCount > 0` (default: **1**), the driver negotiates `FEATUREEXT_SRECOVERY` with the +server during login. On reconnection, `WriteSessionRecoveryFeatureRequest()` encodes: + +1. **Initial state**: `_initialDatabase`, `_initialCollation`, `_initialLanguage`, `_initialState[]` +2. **Current deltas**: `_database` (if different from `_initialDatabase`), `_language`, + `_collation`, `_delta[]` + +The server uses the initial state + deltas to rebuild the session. If `_database != +_initialDatabase`, the server switches to `_database` after login. + +### `WriteSessionRecoveryFeatureRequest` — relevant excerpt + +```text +TdsParser.cs line 8963: + initialLength += ... _initialDatabase ... +TdsParser.cs line 8966: + currentLength += ... (_initialDatabase == _database ? 0 : _database) ... +TdsParser.cs line 9017: + WriteIdentifier(_database != _initialDatabase ? _database : null, ...) +``` + +When `_database` equals `_initialDatabase`, a zero-length identifier is written (meaning "no +change"). When they differ, the current database name is written and the server applies it. + +## Flow: How `ValidateAndReconnect` triggers recovery + +```text +SqlCommand.RunExecuteNonQueryTds() + → SqlConnection.ValidateAndReconnect() + check _connectRetryCount > 0 + check _sessionRecoveryAcknowledged + check !stateObj.ValidateSNIConnection() ← physical connection broken? + SessionData cData = tdsConn.CurrentSessionData ← snapshot (writes _database = CurrentDatabase) + _recoverySessionData = cData ← save for new connection + tdsConn.DoomThisConnection() + Task.Run → ReconnectAsync() + → ForceNewConnection = true + → OpenAsync() + → TryReplaceConnection() + → SqlConnectionFactory.CreateConnection() + → new SqlConnectionInternal(..., recoverySessionData) + constructor: _originalDatabase = recoverySessionData._initialDatabase + Login() + → CurrentDatabase = InitialCatalog + → login.database = CurrentDatabase + → TdsLogin(..., _recoverySessionData, ...) + → WriteSessionRecoveryFeatureRequest(recoverySessionData, ...) + Server processes login + recovery → ENV_CHANGE(ENV_DATABASE) → CurrentDatabase updated + CompleteLogin() + → _recoverySessionData = null +``` diff --git a/plans/database_context/02-flows.md b/plans/database_context/02-flows.md new file mode 100644 index 0000000000..40e54045f5 --- /dev/null +++ b/plans/database_context/02-flows.md @@ -0,0 +1,249 @@ +# Reconnection Flows and Database Context Behaviour + +Every code path that can create a new physical connection while the `SqlConnection` remains "open" +to the caller is listed below. For each flow, the analysis records whether database context is +preserved. + +--- + +## Flow 1: Command-triggered transparent reconnect (sync) + +**Entry**: `SqlCommand.RunExecuteNonQueryTds()` / `RunExecuteReaderTds()` + +```text +1. ValidateAndReconnect(null, timeout) +2. _sessionRecoveryAcknowledged? ── No → return null → command proceeds on broken conn → SqlException + │ Yes ↓ +3. ValidateSNIConnection()? ─── OK → return null → command proceeds normally + │ broken ↓ +4. cData = tdsConn.CurrentSessionData ← _database = CurrentDatabase (e.g. "MyDb") +5. _recoverySessionData = cData +6. DoomThisConnection() +7. Task.Run(ReconnectAsync) +8. ForceNewConnection = true +9. OpenAsync → TryReplaceConnection → CreateConnection(recoverySessionData) +10. new SqlConnectionInternal(recoverySessionData) +11. Login: CurrentDatabase = InitialCatalog ← temporarily wrong +12. TdsLogin sends _recoverySessionData._database = "MyDb" in recovery packet +13. Server restores session → ENV_CHANGE → CurrentDatabase = server's DB +14. CompleteLogin: +14a. recoveredDatabase = _recoverySessionData._database ("MyDb") +14b. _recoverySessionData = null +14c. if CurrentDatabase != recoveredDatabase: +14d. → execute USE [MyDb] on the wire → server switches to MyDb +14e. → ENV_CHANGE → CurrentDatabase = "MyDb" +15. ReconnectAsync returns +16. Command re-executes on new connection (CurrentDatabase = "MyDb") +``` + +**Database context preserved?** Yes — if session recovery is acknowledged by the server and +`_unrecoverableStatesCount == 0`. Even if the server does not properly restore the database +context, the fix in `CompleteLogin()` detects the mismatch and issues a `USE` command to force +alignment between client and server. See [Issue G in 03-issues.md](03-issues.md). + +**Files**: `SqlCommand.NonQuery.cs` line 756, `SqlCommand.Reader.cs` line 1265, `SqlConnection.cs` +lines 1662–1830, `SqlConnectionInternal.cs` constructor. + +--- + +## Flow 2: Command-triggered transparent reconnect (async) + +Same as Flow 1 except reconnection completes asynchronously and the command execution is chained as +a continuation via `RunExecuteNonQueryTdsSetupReconnnectContinuation` / +`RunExecuteReaderTdsSetupReconnectContinuation`. + +**Database context preserved?** Same as Flow 1. + +--- + +## Flow 3: `RepairInnerConnection` path (`ChangeDatabase`, `EnlistTransaction`) + +**Entry**: `SqlConnection.ChangeDatabase()`, `SqlConnection.EnlistTransaction()` + +```text +1. RepairInnerConnection() +2. WaitForPendingReconnection() +3. tdsConn.GetSessionAndReconnectIfNeeded(this) +4. _parserLock.Wait() +5. ValidateAndReconnect(releaseLock, timeout) ← same as Flow 1 +6. AsyncHelper.WaitForCompletion(reconnectTask) +7. InnerConnection.ChangeDatabase(database) / EnlistTransaction(tx) +``` + +**Database context preserved?** Yes — the reconnect restores the session, then the caller's database +change runs on the recovered connection. The `ChangeDatabase` call overwrites the context +immediately after, so even if recovery landed on the wrong database, the explicit change corrects +it. + +**Files**: `SqlConnection.cs` lines 1330–1347, 1540–1590, 1847–1863. + +--- + +## Flow 4: Pool recycle (`Deactivate` → `ResetConnection`) + +**Entry**: `SqlConnection.Close()` → pool returns connection + +```text +1. Deactivate() +2. _parser.Deactivate() +3. ResetConnection() +4. PrepareResetConnection(preserveTransaction) ← sets TDS flag +5. CurrentDatabase = _originalDatabase ← back to InitialCatalog +``` + +Next user to acquire this connection: + +```text +6. First TDS packet piggybacks sp_reset_connection header flag +7. Server executes sp_reset_connection → resets to login database +8. Server sends ENV_SPRESETCONNECTIONACK → SessionData.Reset() (clears _database, _delta) +9. Server sends ENV_CHANGE(ENV_DATABASE) → CurrentDatabase = login database +``` + +**Database context preserved?** No — by design. Pool reset intentionally reverts to the initial +catalog. This is correct behaviour; a pooled connection must not leak database context to the next +consumer. + +**Files**: `SqlConnectionInternal.cs` lines 2063–2122, 3883–3907. + +--- + +## Flow 5: Session recovery not acknowledged (fallback) + +**Condition**: `_sessionRecoveryAcknowledged == false` (server does not support session recovery, or +`ConnectRetryCount == 0`). + +```text +1. ValidateAndReconnect() +2. _connectRetryCount > 0? ── No → return null + │ Yes ↓ +3. _sessionRecoveryAcknowledged? ── No → return null +``` + +The broken connection is **not** detected by `ValidateAndReconnect`. The next TDS write/read fails +with a `SqlException`. The user must handle the error themselves. If they retry by re-opening or +re-executing: + +- Pooled: new connection from pool → initial catalog +- Non-pooled: new connection → initial catalog + +**Database context preserved?** No. There is no mechanism to recover the database context when +session recovery is unsupported. The error surfaces to the caller, so this is not a silent data +corruption — the user explicitly sees a failure. + +**Files**: `SqlConnection.cs` lines 1750–1755. + +--- + +## Flow 6: Unrecoverable session state + +**Condition**: `_sessionRecoveryAcknowledged == true` but `cData._unrecoverableStatesCount > 0`. + +```text +1. ValidateAndReconnect() +2. cData = tdsConn.CurrentSessionData +3. cData._unrecoverableStatesCount > 0 +4. → OnError(SQL.CR_UnrecoverableServer) ← throws SqlException +``` + +**Database context preserved?** No — but the error is explicit. The caller knows the connection is +broken. + +**Files**: `SqlConnection.cs` lines 1823–1828. + +--- + +## Flow 7: MARS with active sessions + +**Condition**: Multiple Active Result Sets enabled, more than one active session. + +```text +1. ValidateAndReconnect() +2. _sessionPool.ActiveSessionsCount > 0 +3. → OnError(SQL.CR_UnrecoverableClient) ← throws SqlException +``` + +**Database context preserved?** No — recovery is not possible with multiple active sessions. Error +is explicit. + +**Files**: `SqlConnection.cs` lines 1761–1770. + +--- + +## Flow 8: `ChannelDbConnectionPool` (V2 pool) reconnection + +**Condition**: `UseConnectionPoolV2` AppContext switch is enabled. + +```text +1. ValidateAndReconnect → ReconnectAsync → OpenAsync +2. ForceNewConnection = true +3. TryReplaceConnection → TryGetConnection +4. connectionPool.ReplaceConnection(...) +5. throw new NotImplementedException() ← CRASH +``` + +**Database context preserved?** No — `ReplaceConnection` is not implemented. The reconnection fails +with `NotImplementedException`, which propagates as an unhandled error. + +**Files**: `ChannelDbConnectionPool.cs` lines 173–177. + +--- + +## Flow 9: Non-pooled connection reconnection + +**Condition**: `Pooling=false` in connection string. + +```text +1. ValidateAndReconnect → ReconnectAsync → OpenAsync +2. ForceNewConnection = true +3. TryReplaceConnection = TryOpenConnectionInternal +4. connectionFactory.TryGetConnection +5. pool == null → CreateNonPooledConnection +6. SqlConnectionFactory.CreateConnection(recoverySessionData = null) ← BUG? +``` + +Wait — let me verify this. `CreateNonPooledConnection` calls `CreateConnection` but for non-pooled +it goes through a different path. Let me check whether `recoverySessionData` is passed. + +Looking at `SqlConnectionFactory.TryGetConnection` (`SqlConnectionFactory.cs` line 380–470): + +```text +pool == null (non-pooled): + → CreateNonPooledConnection(owningConnection, poolGroup, userOptions) +``` + +And `CreateNonPooledConnection` (`SqlConnectionFactory.cs` line 485): + +```text +return CreateConnection(options, poolKey, null, null, owningConnection, userOptions) +``` + +In `CreateConnection` (`SqlConnectionFactory.cs` line 580): + +```text +recoverySessionData = sqlOwningConnection._recoverySessionData ← READ from SqlConnection +``` + +So `recoverySessionData` IS read from `sqlOwningConnection._recoverySessionData`, which was set by +`ValidateAndReconnect`. Non-pooled connections DO get recovery data. + +**Database context preserved?** Yes — same mechanism as pooled connections. Recovery data is passed +through `sqlOwningConnection._recoverySessionData`. + +**Files**: `SqlConnectionFactory.cs` lines 485, 580–608. + +--- + +## Summary Table + +| Flow | Trigger | Context Preserved? | Silent? | Notes | +| ---- | ------- | ------------------ | ------- | ----- | +| 1 | Command execution (sync) | Yes | Yes (transparent) | Requires session recovery support | +| 2 | Command execution (async) | Yes | Yes (transparent) | Same as 1 | +| 3 | `ChangeDatabase`/`EnlistTransaction` | Yes | Yes (transparent) | `RepairInnerConnection` runs first | +| 4 | Pool recycle | No (by design) | N/A | Pool reset is intentional | +| 5 | No session recovery support | No | No (SqlException) | Error surfaces to caller | +| 6 | Unrecoverable session state | No | No (SqlException) | Error surfaces to caller | +| 7 | MARS active sessions | No | No (SqlException) | Error surfaces to caller | +| 8 | V2 pool (`ChannelDbConnectionPool`) | No | No (NotImplementedException) | **Bug**: `ReplaceConnection` unimplemented | +| 9 | Non-pooled connection | Yes | Yes (transparent) | Recovery data passed correctly | diff --git a/plans/database_context/03-issues.md b/plans/database_context/03-issues.md new file mode 100644 index 0000000000..862b6cd5d6 --- /dev/null +++ b/plans/database_context/03-issues.md @@ -0,0 +1,267 @@ +# Identified Issues + +Seven issues were found during the analysis. They are ordered from highest to lowest severity +relative to the invariant: *internal reconnections must maintain the current database context*. + +--- + +## Issue G — `CompleteLogin` does not verify database context after session recovery + +**Severity**: High (silent data corruption — client and server on different databases) +**Conditions**: Session recovery succeeds but the server does not restore the database context +**Default impact**: After reconnection, `CurrentDatabase` reflects whatever the server sent in +`ENV_CHANGE` (the initial catalog), not the database the session was actually using before the +connection dropped. Subsequent queries silently execute against the wrong database. + +### Description + +During a reconnection with session recovery, the client sends the correct database in the recovery +feature request (`_recoverySessionData._database`). The server is supposed to restore the session to +that database and confirm via `ENV_CHANGE(ENV_DATABASE)`. However, `CompleteLogin()` never checks +whether the server actually honoured the recovery request. It unconditionally nulls +`_recoverySessionData` and trusts whatever `CurrentDatabase` was set to by the server's +`ENV_CHANGE`. + +If the server fails to restore the database (sends the initial catalog in `ENV_CHANGE`, or omits the +database `ENV_CHANGE` entirely), the client silently ends up on the wrong database. + +### Root cause + +`CompleteLogin()` in `SqlConnectionInternal.cs` (line ~2315) nulls `_recoverySessionData` without +comparing the recovered database against `CurrentDatabase`. There is no corrective action when they +differ. + +### Location + +`SqlConnectionInternal.cs`, `CompleteLogin()` method — the block after encryption validation where +`_recoverySessionData = null` is set. + +### Effect + +After a transparent reconnection: +- `SqlConnection.Database` returns the initial catalog instead of the `USE`-switched database +- All subsequent queries execute against the wrong database +- The mismatch is completely silent — no exception, no warning + +This is the root cause of [dotnet/SqlClient#4108](https://github.com/dotnet/SqlClient/issues/4108). + +### Fix applied + +See [04-recommendations.md](04-recommendations.md) Fix 1. In `CompleteLogin()`, after session +recovery is acknowledged and encryption is verified, the fix compares `CurrentDatabase` against the +recovered database from `_recoverySessionData`. If they differ, it issues a `USE [database]` command +to force the server to the correct database, ensuring both client and server agree. + +--- + +## Issue A — `ChannelDbConnectionPool.ReplaceConnection` throws `NotImplementedException` + +**Severity**: High (crash on reconnect) +**Conditions**: `UseConnectionPoolV2` AppContext switch enabled +**Default impact**: None (switch defaults to `false`) + +### Description + +`ChannelDbConnectionPool.ReplaceConnection()` is a stub that throws `NotImplementedException`. The +transparent reconnection flow calls `connectionPool.ReplaceConnection()` via `TryGetConnection` when +`ForceNewConnection == true`. With the V2 pool enabled, any reconnection attempt crashes instead of +recovering. + +### Location + +`ChannelDbConnectionPool.cs` lines 173–177: + +```csharp +public DbConnectionInternal ReplaceConnection( + DbConnection owningObject, + DbConnectionOptions userOptions, + DbConnectionInternal oldConnection) +{ + throw new NotImplementedException(); +} +``` + +### Call chain + +```text +ReconnectAsync + → OpenAsync + → TryOpenInner (ForceNewConnection == true) + → TryReplaceConnection + → TryOpenConnectionInternal + → SqlConnectionFactory.TryGetConnection + → connectionPool.ReplaceConnection(...) ← throws +``` + +### Effect + +Database context is lost. The `NotImplementedException` propagates out of `ReconnectAsync`, which +catches `SqlException` but not `NotImplementedException`, so it escapes as an unhandled failure. The +connection becomes unusable. + +--- + +## Issue B — `CurrentSessionData` getter is not thread-safe + +**Severity**: Medium (potential silent data corruption) +**Conditions**: Concurrent access to the connection from multiple threads (e.g. timer-based health +checks, or async continuations racing with foreground work) +**Default impact**: Low probability, but when hit, recovery may target the wrong database + +### Description + +The `CurrentSessionData` property writes `_database` and `_language` to the `SessionData` object on +every read: + +```csharp +internal SessionData CurrentSessionData +{ + get + { + if (_currentSessionData != null) + { + _currentSessionData._database = CurrentDatabase; // ← non-atomic write + _currentSessionData._language = _currentLanguage; // ← non-atomic write + } + return _currentSessionData; + } +} +``` + +`SqlConnectionInternal.cs` lines 530–537. + +If `ValidateAndReconnect` calls `CurrentSessionData` at the same moment another thread modifies +`CurrentDatabase` (e.g. from a completed ENV_CHANGE callback), the snapshot may capture a +partially-updated state. For example, `_database` could be written as the old value while +`_language` gets the new value, or vice-versa. + +### Effect + +The session recovery feature request encodes a snapshot with inconsistent database/language. The +server may restore the session to the wrong database or wrong language. + +--- + +## Issue C — `Login()` overwrites `CurrentDatabase` before session recovery completes + +**Severity**: Low (self-correcting) +**Conditions**: Every reconnection +**Default impact**: Transient — corrected by server ENV_CHANGE response + +### Description + +In `Login()` at `SqlConnectionInternal.cs` line 2976: + +```csharp +CurrentDatabase = server.ResolvedDatabaseName; +``` + +`ServerInfo.ResolvedDatabaseName` is always `ConnectionOptions.InitialCatalog`. During reconnection, +this overwrites `CurrentDatabase` from (for example) `"MyDb"` to `"master"` before the TDS login +packet is even sent. + +The server processes the login, applies session recovery (including the database change back to +`"MyDb"`), and sends ENV_CHANGE tokens that correct `CurrentDatabase`. So the final state is +correct. + +### Effect + +Between the `Login()` call and the ENV_CHANGE response, `CurrentDatabase` is temporarily wrong. Any +code that reads `CurrentDatabase` in this window (diagnostic tracing, health monitoring, etc.) sees +the initial catalog instead of the recovered database. No functional impact on the TDS protocol +because the recovery feature request carries the correct `_database` value from the snapshot taken +*before* `Login()` was called. + +--- + +## Issue D — `_originalDatabase` set to `_initialDatabase` on reconnect (not current) + +**Severity**: Low (correct for pooling, surprising for non-pooled) +**Conditions**: Every reconnection with recovery data +**Default impact**: `ResetConnection()` after reconnect targets the original initial catalog, not +the USE'd database + +### Description + +In the `SqlConnectionInternal` constructor during reconnection: + +```csharp +_originalDatabase = _recoverySessionData._initialDatabase; +``` + +`SqlConnectionInternal.cs` line 384. + +`_originalDatabase` is the target for `ResetConnection()`. After reconnection, it is set to the +**first login's** initial database, not the database the session was recovered to. + +For pooled connections this is correct: pool reset should return the connection to the initial +catalog regardless of any `USE` commands. For non-pooled connections, `ResetConnection` is not +called, so this is moot. + +### Effect + +None in practice — `_originalDatabase` is only consumed by `ResetConnection()`, which is only called +during pool deactivation. + +--- + +## Issue E — `SessionData` copy constructor does not copy `_database` + +**Severity**: Low (compensated by `CurrentSessionData` getter) +**Conditions**: Every reconnection + +### Description + +The `SessionData` copy constructor copies baselines but not transient state: + +```csharp +public SessionData(SessionData recoveryData) +{ + _initialDatabase = recoveryData._initialDatabase; + _initialCollation = recoveryData._initialCollation; + _initialLanguage = recoveryData._initialLanguage; + _resolvedAliases = recoveryData._resolvedAliases; + // _database, _language, _collation are NOT copied +} +``` + +`SessionData.cs` lines 46–60. + +This is not a bug because `_database` and `_language` are populated just-in-time by the +`CurrentSessionData` getter before the recovery data is read by +`WriteSessionRecoveryFeatureRequest`. The `CurrentSessionData` getter writes `_database = +CurrentDatabase` before returning. The copy constructor is only used to initialise the **new** +connection's `_currentSessionData`, not to read recovery values. + +### Effect + +None — by design. Noted here for completeness because the missing copy looks suspicious on first +read. + +--- + +## Issue F — No database context recovery when session recovery is unsupported + +**Severity**: Informational +**Conditions**: Server does not support `FEATUREEXT_SRECOVERY`, or `ConnectRetryCount == 0` +**Default impact**: Connection breaks surface as `SqlException`; no silent context loss + +### Description + +When `_sessionRecoveryAcknowledged == false`, `ValidateAndReconnect()` does not check +`ValidateSNIConnection()` and returns `null`. The broken connection is only detected when the next +TDS write/read fails. The error propagates to the caller as a `SqlException`. + +If the caller has retry logic (whether application-level or via `SqlRetryLogicProvider`), the retry +acquires a new connection from the pool. That connection has been reset via `sp_reset_connection` +and is in the initial catalog, not the database the user `USE`d to. + +This is not a bug in the driver — session recovery is the mechanism for preserving context, and +without it the driver cannot help. However, the behaviour is surprising for users who expect `USE +[db]` to be durable. + +### Effect + +Users who rely on `USE [db]` instead of `SqlConnection.ChangeDatabase()` may see their database +context lost after a transient failure + retry. The recommended mitigation is to use +`ChangeDatabase()` or to re-issue `USE [db]` after any error. diff --git a/plans/database_context/04-recommendations.md b/plans/database_context/04-recommendations.md new file mode 100644 index 0000000000..6f107fe98f --- /dev/null +++ b/plans/database_context/04-recommendations.md @@ -0,0 +1,211 @@ +# Recommendations + +Fixes are prioritised by the severity rankings in [03-issues.md](03-issues.md). + +--- + +## Fix 1 — Verify and correct database context after session recovery (Issue G) ✅ IMPLEMENTED + +### Goal + +After a successful session recovery, ensure both the client and server are on the database the +session was using before the connection dropped. + +### Root cause + +`CompleteLogin()` unconditionally trusts the server's `ENV_CHANGE(ENV_DATABASE)` response after +session recovery. If the server fails to restore the database (sends the initial catalog, or omits +the database `ENV_CHANGE` entirely), `CurrentDatabase` silently ends up wrong. + +### Approach + +In `CompleteLogin()`, after the server has acknowledged session recovery and encryption is verified: + +1. Read the expected database from `_recoverySessionData._database` (the database at disconnect + time), falling back to `_recoverySessionData._initialDatabase` if `_database` is null (meaning + the database was never changed from the initial login). +2. Null `_recoverySessionData` (as before). +3. Compare the expected database against `CurrentDatabase` (which was set by the server's + `ENV_CHANGE` during login). +4. If they differ, issue a `USE [database]` command over the wire to force the server to the + correct database. This ensures both client and server agree. +5. Set `CurrentDatabase` to the recovered database as a final safety net. + +When the databases already match (the normal case with a well-behaved server), no `USE` is sent — +zero overhead. + +### Files changed + +| File | Change | +| ---- | ------ | +| `SqlConnectionInternal.cs` | Added database context verification and `USE` correction in `CompleteLogin()` after session recovery | + +### Tests added + +| Test | Scenario | +| ---- | -------- | +| `UseDatabase_ProperRecovery_DatabaseContextPreservedAfterReconnect` | Server properly restores DB via session recovery — baseline | +| `ChangeDatabase_ProperRecovery_DatabaseContextPreservedAfterReconnect` | Same, via `ChangeDatabase()` | +| `UseDatabase_ProperRecovery_Pooled_DatabaseContextPreservedAfterReconnect` | Same, with pooling enabled | +| `UseDatabase_BuggyRecovery_DatabaseContextPreservedAfterReconnect` | Server sends wrong DB in `ENV_CHANGE` — fix issues `USE` to correct | +| `ChangeDatabase_BuggyRecovery_DatabaseContextPreservedAfterReconnect` | Same, via `ChangeDatabase()` | +| `UseDatabase_OmittedEnvChange_DatabaseContextPreservedAfterReconnect` | Server omits DB `ENV_CHANGE` entirely — fix issues `USE` to correct | +| `ChangeDatabase_OmittedEnvChange_DatabaseContextPreservedAfterReconnect` | Same, via `ChangeDatabase()` | +| `UseDatabase_ConnectionDropped_NoRetry_ThrowsOnNextCommand` | `ConnectRetryCount=0` — no recovery, error surfaces | + +### Verification + +All 10 tests pass. Full unit test suite (631 tests) shows no regressions. + +--- + +## Fix 2 — Implement `ChannelDbConnectionPool.ReplaceConnection` (Issue A) + +### Goal + +Make transparent reconnection work when the V2 connection pool is enabled. + +### Approach + +Implement `ReplaceConnection` in `ChannelDbConnectionPool` to mirror the behaviour of +`WaitHandleDbConnectionPool.ReplaceConnection`: + +1. Create a new physical connection via `ConnectionFactory.CreatePooledConnection` (which passes + `recoverySessionData` through `SqlConnectionFactory.CreateConnection`). +2. Remove the old connection from the slot tracking structure. +3. Add the new connection. +4. Call `PrepareConnection` on the new connection. +5. Call `PrepareForReplaceConnection`, `DeactivateConnection`, and `Dispose` on the old connection. + +### Files to change + +| File | Change | +| ---- | ------ | +| `ChannelDbConnectionPool.cs` | Replace `throw new NotImplementedException()` with working implementation | + +### Tests + +- Unit test: mock a broken connection, enable V2 pool, verify `ReplaceConnection` returns a live + connection and disposes the old one. +- Functional test: open connection, `USE [db]`, break the physical connection (e.g. `kill + session_id` on the server), execute another command, verify `CurrentDatabase` matches the `USE`d + database. + +--- + +## Fix 3 — Make `CurrentSessionData` snapshot atomic (Issue B) + +### Goal + +Prevent race conditions when snapshotting session state for recovery. + +### Approach + +Instead of writing `_database` and `_language` into the live `SessionData` object on every property +access, take an explicit snapshot under the parser lock. The parser lock is already held by +`GetSessionAndReconnectIfNeeded` and by `ValidateAndReconnect` (which holds `_reconnectLock`). + +Option A — Snapshot method: + +```csharp +internal SessionData SnapshotSessionData() +{ + if (_currentSessionData == null) + return null; + + // Write transient fields under caller's lock + _currentSessionData._database = CurrentDatabase; + _currentSessionData._language = _currentLanguage; + return _currentSessionData; +} +``` + +Replace the `CurrentSessionData` property getter with a plain auto-property (or read-only field) and +call `SnapshotSessionData()` from `ValidateAndReconnect` where the lock is already held. + +Option B — Copy-on-read: + +Clone the `SessionData` at snapshot time so that any later mutations (e.g. ENV_CHANGE arriving on a +different thread) don't affect the recovery data. This is safer but allocates. + +### Files to change + +| File | Change | +| ---- | ------ | +| `SqlConnectionInternal.cs` | Replace `CurrentSessionData` getter; add `SnapshotSessionData()` | +| `SqlConnection.cs` | Call `SnapshotSessionData()` in `ValidateAndReconnect` instead of reading property | + +### Tests + +- Hard to unit-test race conditions directly. Add a stress test that repeatedly executes `USE [db]` + and forces reconnections concurrently, then asserts `CurrentDatabase` is correct after each + reconnection. + +--- + +## Fix 4 — Document `USE [db]` vs `ChangeDatabase` resilience difference (Issue F) + +### Goal + +Help users understand that `USE [db]` via `SqlCommand` has the same resilience as `ChangeDatabase()` +when session recovery is supported, but **neither** survives a full connection failure that doesn't +support session recovery. The recommended pattern is to check `SqlConnection.Database` after any +retry. + +### Approach + +- Add a remark in the XML doc for `SqlConnection.ChangeDatabase` noting that both `ChangeDatabase()` + and `USE [db]` are tracked by the driver's session recovery mechanism. +- Add a remark noting that if the application catches a `SqlException` and retries, it should + re-establish the desired database context because the retry may land on a fresh connection. +- Add a doc sample showing the resilient pattern. + +### Files to change + +| File | Change | +| ---- | ------ | +| `doc/snippets/Microsoft.Data.SqlClient/SqlConnection.xml` | Add resilience remarks to `ChangeDatabase` entry | +| `doc/samples/` | Add a sample demonstrating database context verification after retry | + +--- + +## Fix 5 (Optional) — Warn when `USE [db]` is detected without session recovery (Issue F) + +### Goal + +Surface a diagnostic event when the driver detects that `CurrentDatabase` changed via ENV_CHANGE but +session recovery is not enabled. This helps users understand they are at risk of context loss. + +### Approach + +In `OnEnvChange`, when `ENV_DATABASE` is received and `_fConnectionOpen == true` (i.e. a runtime +database change, not login), check `_sessionRecoveryAcknowledged`. If `false`, emit an `EventSource` +trace at Warning level. + +### Files to change + +| File | Change | +| ---- | ------ | +| `SqlConnectionInternal.cs` | Add warning trace in `OnEnvChange` case `ENV_DATABASE` | + +### Risk + +Low — trace-only, no behaviour change. + +--- + +## Non-recommendations + +### Do not update `_originalDatabase` after `USE [db]` + +It is tempting to update `_originalDatabase` when `ENV_DATABASE` arrives on an open connection, so +that a subsequent pool reset keeps the `USE`d database. However, this would break pool isolation: a +connection returned to the pool would not reset to the initial catalog, leaking database context to +the next consumer. The current behaviour (freeze `_originalDatabase` at login time) is correct. + +### Do not attempt recovery without server support + +If the server does not acknowledge `FEATUREEXT_SRECOVERY`, the driver cannot reconstruct the +session. Attempting a client-side `USE [db]` after reconnection would be fragile (the database might +not exist on a failover replica, the user might not have access, etc.). The correct approach is to +surface the error and let the application decide. diff --git a/plans/database_context/05-reconnection-and-retry-mechanisms.md b/plans/database_context/05-reconnection-and-retry-mechanisms.md new file mode 100644 index 0000000000..f94d4aa5fd --- /dev/null +++ b/plans/database_context/05-reconnection-and-retry-mechanisms.md @@ -0,0 +1,351 @@ +# Reconnection and Retry Mechanisms in Microsoft.Data.SqlClient + +## Purpose + +This document catalogues every mechanism through which `SqlConnection` or `SqlCommand` may +re-execute or re-establish work after a failure. Each mechanism is described with its trigger, +scope, configuration, database-context implications, and links to the authoritative public +documentation. + +The mechanisms fall into two fundamentally different categories: + +| Category | Scope | Who drives it | User visibility | +| -------- | ----- | ------------- | --------------- | +| **Connection-level** | Physical TCP/TDS session | Driver internals | Transparent — the application does not see the retry loop | +| **Command-level** | Logical operation (`Open`, `ExecuteReader`, …) | `SqlRetryLogicBaseProvider` | Semi-transparent — the application opts in and can observe via `Retrying` event | + +> **Key distinction:** Connection-level retries attempt to restore the *physical link* (and, where +> possible, the session state). Command-level retries re-execute the *entire operation* from +> scratch, which means a new connection may be obtained from the pool and all session state set +> outside the connection string (e.g. `USE [db]`, `SET` options) is lost unless the application +> restores it. + +--- + +## 1 Idle Connection Resiliency (Connection Retry) + +### What it is + +A driver-internal mechanism that transparently reconnects a broken physical connection *before* (or +instead of) surfacing an error to the application. It is sometimes called **connection resiliency** +or **idle connection recovery**. + +### When it triggers + +1. A command is about to execute (`ExecuteReader`, `ExecuteNonQuery`, etc.). +2. The driver calls `SqlConnection.ValidateAndReconnect()` which validates the SNI link via + `TdsParserStateObject.ValidateSNIConnection()`. +3. If the link is dead **and** session recovery was negotiated with the server + (`_sessionRecoveryAcknowledged == true`), the driver enters the reconnect loop. + +### Configuration + +| Connection string keyword | Default | Azure SQL | Synapse On-Demand | Range | +| ------------------------- | ------- | --------- | ----------------- | ----- | +| `ConnectRetryCount` | 1 | 2 (auto) | 5 (auto) | 0 – 255 | +| `ConnectRetryInterval` | 10 s | 10 s | 10 s | 1 – 60 s | + +Setting `ConnectRetryCount=0` disables both idle connection resiliency and session recovery +negotiation entirely. The elevated defaults for Azure endpoints are applied automatically in +`SqlConnection.CacheConnectionStringProperties()` ([SqlConnection.cs, line +~483](../../../src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/SqlConnection.cs)). + +### Code path + +```text +SqlCommand.RunExecuteNonQueryTds / RunExecuteReaderTds + └─ SqlConnection.ValidateAndReconnect(beforeDisconnect, timeout) + ├─ ValidateSNIConnection() → link alive? return null (no-op) + ├─ Check _sessionRecoveryAcknowledged + ├─ Snapshot CurrentSessionData → captures _database, _initialDatabase, _delta[] + ├─ DoomThisConnection() + └─ Task.Factory.StartNew → ReconnectAsync(timeout) + └─ for (attempt = 0; attempt < _connectRetryCount; attempt++) + ├─ ForceNewConnection = true + ├─ OpenAsync() → full login, sends session-recovery feature request + ├─ On success → return (session state restored by server) + ├─ On SqlException: + │ ├─ last attempt? → throw CR_AllAttemptsFailed + │ └─ timeout soon? → throw CR_NextAttemptWillExceedQueryTimeout + └─ Task.Delay(ConnectRetryInterval * 1000) +``` + +### Session state and database context + +During the reconnection login, the driver sends a **session recovery feature request** +(`TdsParser.WriteSessionRecoveryFeatureRequest`) containing the `SessionData` snapshot. This tells +the server to replay the session's accumulated state — including the current database — so that the +reconnected session is in the same logical state as the old one. + +If session recovery succeeds, the **current database is preserved** transparently. + +### Failure modes for database context + +| Condition | Outcome | +| --------- | ------- | +| Server does not support session recovery | `_sessionRecoveryAcknowledged` is `false`; `ValidateAndReconnect` returns `null`; the broken link surfaces as a `SqlException` on the next command — **no transparent retry** | +| Unrecoverable session states (e.g. open `MARS` sessions, temp tables, certain `SET` options) | `CR_UnrecoverableClient` / `CR_UnrecoverableServer` error thrown — **no transparent retry** | +| `ConnectRetryCount = 0` | Session recovery feature is not negotiated; no transparent reconnect | + +### Public documentation + +- [SqlConnection.ConnectionString — + `ConnectRetryCount`](https://learn.microsoft.com/dotnet/api/microsoft.data.sqlclient.sqlconnection.connectionstring): + *"Controls the number of reconnection attempts after the client identifies an idle connection + failure. Valid values are 0 to 255. The default is 1, which disables reconnecting on idle + connection failure; a value of 0 is not allowed." [...] "When set to a positive value, connection + resiliency is turned on. When the client detects a broken idle connection, it creates a new + connection and restores the server-side session state."* +- [Connection string + syntax](https://learn.microsoft.com/sql/connect/ado-net/connection-string-syntax): Lists + `ConnectRetryCount` and `ConnectRetryInterval` as connection string keywords. +- [SQL Server connection + pooling](https://learn.microsoft.com/sql/connect/ado-net/sql-server-connection-pooling): Describes + how severed connections are detected and how pool recycling interacts with connection lifetime. + +--- + +## 2 Connection Open Retry (Connection Retry) + +### What it is + +Retries of `SqlConnection.Open()` / `OpenAsync()` driven by the **configurable retry logic** +provider attached to the connection. This is *not* the same as idle connection resiliency — it wraps +the *initial* `Open` call (or any subsequent explicit `Open` after a `Close`). + +### When it triggers + +`SqlConnection.Open()` checks `IsProviderRetriable`. If true, the call is funnelled through +`TryOpenWithRetry()` → `RetryLogicProvider.Execute(this, () => TryOpen(…))`. The retry provider +catches exceptions, evaluates `TransientPredicate` and `RetryCondition`, and re-invokes the delegate +after an interval. + +### Configuration + +```csharp +// Assign a custom provider before opening +connection.RetryLogicProvider = SqlConfigurableRetryFactory.CreateFixedRetryProvider( + new SqlRetryLogicOption + { + NumberOfTries = 5, + DeltaTime = TimeSpan.FromSeconds(1), + TransientErrors = new[] { 4060, 40197, 40501, 40613, 49918, 49919, 49920, 11001 } + }); +connection.Open(); // retried up to 5 times on transient errors +``` + +Alternatively, retry logic can be configured globally via `appsettings.json` / +`SqlConfigurableRetryLogicManager`. + +### Database context implications + +Each retry calls `Open()` from scratch. The connection obtained is brand-new (from the pool or +freshly created). Database context is determined solely by the `Initial Catalog` in the connection +string — **no prior session state carries over**. + +This is expected behaviour: the connection was never open, so there is no prior state to restore. + +### Public documentation + +- [Configurable retry logic in + SqlClient](https://learn.microsoft.com/sql/connect/ado-net/configurable-retry-logic): Overview, + configuration, and API reference for `SqlRetryLogicBaseProvider` and + `SqlConfigurableRetryFactory`. +- [SqlConnection + class](https://learn.microsoft.com/dotnet/api/microsoft.data.sqlclient.sqlconnection): Documents + the `RetryLogicProvider` property. + +--- + +## 3 Command Execution Retry (Command Retry) + +### What it is + +Retries of individual command executions (`ExecuteNonQuery`, `ExecuteReader`, `ExecuteScalar`, +`ExecuteXmlReader` and their `Async` variants) driven by the `SqlRetryLogicBaseProvider` attached to +the *command*. + +### When it triggers + +Each public `Execute*` method checks `IsProviderRetriable`. If true, execution is wrapped in a retry +delegate. For example: + +```text +SqlCommand.ExecuteNonQuery() + └─ InternalExecuteNonQueryWithRetry(…) + └─ RetryLogicProvider.Execute(sender: this, function: () => InternalExecuteNonQuery(…)) +``` + +The retry provider catches `SqlException`, evaluates `TransientPredicate(e)` and +`RetryCondition(sender)`, and re-invokes the delegate on match. + +### Configuration + +```csharp +command.RetryLogicProvider = SqlConfigurableRetryFactory.CreateIncrementalRetryProvider( + new SqlRetryLogicOption + { + NumberOfTries = 3, + DeltaTime = TimeSpan.FromMilliseconds(500), + TransientErrors = new[] { 1205 } // deadlock victim + }); +int rows = command.ExecuteNonQuery(); // retried up to 3 times on deadlock +``` + +### How it differs from connection retries + +| Aspect | Idle Connection Resiliency (§1) | Command Retry (§3) | +| ------ | ------------------------------- | ------------------ | +| **Scope** | Reconnects the physical TCP/TDS session | Re-executes the SQL statement from scratch | +| **Trigger** | Dead SNI link detected before command runs | `SqlException` thrown *during* command execution | +| **Driven by** | `ConnectRetryCount` / `ConnectRetryInterval` in connection string | `SqlRetryLogicBaseProvider` on `SqlCommand.RetryLogicProvider` | +| **Session state** | Preserved via TDS session recovery feature request | **Lost** — the re-executed command runs on whatever session state the connection currently has | +| **Transaction awareness** | N/A (reconnected session cannot be in a user transaction) | `RetryCondition` rejects retry if the command has an active `SqlTransaction` or ambient `Transaction.Current` | +| **Idempotency** | Transparent — the command has not started yet | Caller's responsibility — the same SQL runs again; non-idempotent operations (INSERT, UPDATE) may double-execute | +| **Default state** | Enabled (`ConnectRetryCount ≥ 1`) | Disabled (no-op provider unless explicitly configured) | +| **Available since** | Always (Microsoft.Data.SqlClient 1.0) | Microsoft.Data.SqlClient 3.0 | + +### Database context implications + +Command retry re-invokes the command delegate on the **same `SqlConnection`**. If the connection's +physical link is still healthy, the session state (including current database) is unchanged. +However, if the transient error caused the physical connection to die and be replaced by the pool, +the new connection starts at `Initial Catalog`. + +> **Important:** Command retry does **not** call `ValidateAndReconnect`. It simply re-calls the +> inner execution method. Any physical reconnection that happens is a side-effect of the pool +> returning a different connection after the old one was doomed. + +### Retry condition guards + +`SqlRetryLogic.RetryCondition(sender)` prevents retries when: + +1. There is an ambient `System.Transactions.Transaction.Current`. +2. The `SqlCommand` has a non-null `Transaction` property (explicit `SqlTransaction`). +3. A user-supplied `PreCondition` predicate returns false. + +These guards exist because retrying inside a transaction is almost always incorrect — the +transaction is doomed after the first failure. + +### Public documentation + +- [Configurable retry logic in + SqlClient](https://learn.microsoft.com/sql/connect/ado-net/configurable-retry-logic): Full + walkthrough of `SqlRetryLogicOption`, `SqlConfigurableRetryFactory`, and per-command + configuration. +- [Step 4: Connect resiliently to SQL with + ADO.NET](https://learn.microsoft.com/sql/connect/ado-net/step-4-connect-resiliently-sql-ado-net): + Application-level retry pattern with transient error list (4060, 40197, 40501, 40613, 49918, + 49919, 49920, 11001). + +--- + +## 4 Pool Reset (`sp_reset_connection`) + +### What it is + +Not a retry mechanism per se, but a state-transition that resets session state on a pooled +connection before reuse. Understanding it is essential because it determines what database context a +recycled connection starts with. + +### When it triggers + +When a pooled connection is reused, `TdsParser.PrepareResetConnection()` piggybacks an +`sp_reset_connection` (or `sp_reset_connection_keep_transaction`) request onto the **first TDS +request** sent on that session. The server responds with `ENV_SPRESETCONNECTIONACK`, which triggers +`SessionData.Reset()`. + +### Effect on database context + +`sp_reset_connection` resets the session to its **login defaults**, which means: + +- Current database → reverts to the database from the login (typically `Initial Catalog`, or + `master` if none specified). +- All `SET` options → reverted to server defaults. +- Temporary tables → dropped. +- Session-level state → cleared. + +Any database context set via `USE [db]` during the previous lease of the connection is **lost**. + +### Public documentation + +- [SQL Server connection + pooling](https://learn.microsoft.com/sql/connect/ado-net/sql-server-connection-pooling): *"The + pool is automatically cleared when a fatal error occurs, such as a failover."* +- [Pool fragmentation due to many + databases](https://learn.microsoft.com/sql/connect/ado-net/sql-server-connection-pooling#pool-fragmentation-due-to-many-databases): + The official docs explicitly show the `USE` pattern for switching databases on a pooled connection + but note the pool-fragmentation implications. + +--- + +## 5 Application-Level Custom Retry (Not driver-managed) + +### What it is + +A retry loop implemented entirely in application code, outside the driver. Microsoft's documentation +provides a reference implementation for this pattern. + +### Example (from Microsoft Learn) + +```csharp +for (int tries = 1; tries <= maxRetries; tries++) +{ + try + { + using var connection = new SqlConnection(connectionString); + connection.Open(); + // execute command … + break; + } + catch (SqlException e) when (IsTransient(e.Number)) + { + if (tries == maxRetries) throw; + Thread.Sleep(TimeSpan.FromSeconds(Math.Pow(2, tries))); + } +} +``` + +### Database context implications + +Because each retry creates (or re-opens) a connection, the database context always starts at +`Initial Catalog`. Any session state from a previous attempt is irrelevant — it was on a different +physical connection. + +### Public documentation + +- [Step 4: Connect resiliently to SQL with + ADO.NET](https://learn.microsoft.com/sql/connect/ado-net/step-4-connect-resiliently-sql-ado-net): + Full C# sample with transient error classification. + +--- + +## Summary: Retry Mechanisms at a Glance + +| # | Mechanism | Level | Trigger | Preserves DB Context? | Enabled by Default? | +| - | --------- | ----- | ------- | --------------------- | ------------------- | +| 1 | Idle Connection Resiliency | Connection | Dead SNI link detected before command | **Yes** (via TDS session recovery) | Yes (`ConnectRetryCount=1`) | +| 2 | Connection Open Retry | Connection | `SqlException` during `Open()` | N/A (no prior session) | No (requires `RetryLogicProvider`) | +| 3 | Command Execution Retry | Command | `SqlException` during `Execute*()` | **No** — same connection, but if physical link was replaced, state is lost | No (requires `RetryLogicProvider`) | +| 4 | Pool Reset | Connection | Pooled connection reused | **No** — resets to login defaults | Yes (always, for pooled connections) | +| 5 | Application Custom Retry | Both | Application-defined | **No** — new connection each attempt | N/A (application code) | + +--- + +## Relationship to Issue #4108 + +The bug described in issue #4108 — *"SqlConnection doesn't restore database in the new session if +connection is lost"* — sits at the intersection of mechanisms 1 and 4: + +1. **If idle connection resiliency fires (§1)**, the session recovery feature request *should* + restore the `USE`-switched database. If it does not, there is a driver bug in the `SessionData` + snapshot or the recovery protocol. +2. **If the connection is instead replaced from the pool (§4)**, `sp_reset_connection` clears the + `USE` context and the connection reverts to `Initial Catalog`. This is **by design** — pool reset + always returns to login defaults. +3. **Command-level retry (§3)**, if configured, would re-execute the query on whatever connection + state exists *after* the physical link was replaced — meaning the database context may have been + lost. + +The detailed analysis of each flow and its database-context behaviour is in +[02-flows.md](02-flows.md) and [03-issues.md](03-issues.md). diff --git a/plans/database_context/06-server-side-analysis.md b/plans/database_context/06-server-side-analysis.md new file mode 100644 index 0000000000..0a9c790c3f --- /dev/null +++ b/plans/database_context/06-server-side-analysis.md @@ -0,0 +1,242 @@ +# SQL Server Source Analysis: Session Recovery and Database Context + +## Purpose + +This document examines the SQL Server engine's session recovery implementation (from the internal +`dotnet-sqlclient` repository indexed via Bluebird MCP) to identify how the server handles database +context during reconnection and where issue #4108 could manifest. + +--- + +## Server-Side Session Recovery Flow + +### 1. Parsing the Client's Recovery Request + +**File**: `/sql/ntdbms/tds/src/featureext.cpp` + +`CSessionRecoveryFeatureExtension::ParseFeatureData` (lines 1279–1432) handles the +`FEATUREEXT_SRECOVERY` data from the client's Login7 packet: + +1. Creates two `CTdsSessionState` objects: + - `m_pInitTdsSessionStateData` — baseline state from the initial login + - `m_pTdsSessionStateDataToBe` — target state that the client wants restored + +2. Parses the **initial chunk** into `m_pInitTdsSessionStateData`. + +3. **Clones** the initial data into `m_pTdsSessionStateDataToBe`, then parses the **delta chunk** + on top. The client only sends changed values; unchanged values inherit from the clone. + +4. Marks the physical connection as "Recovered" via `m_pPhyConn->SetRecovered()`. + +### 2. Parsing Each Data Chunk + +**File**: `/sql/ntdbms/tds/src/featureext.cpp` + +`CSessionRecoveryFeatureExtension::ParseSessionDataChunk` (lines 857–1279) deserializes each +`SessionRecoveryData` block. The TDS format is: + +``` +SessionRecoveryData = Length + RecoveryDatabase (BYTE len + WCHAR data) + RecoveryCollation (BYTE len + 0 or 5 bytes) + RecoveryLanguage (BYTE len + WCHAR data) + SessionStateDataSet (sequence of stateId + length + value) +``` + +Key handling for the database field: + +```cpp +// Parse RecoveryDatabase, BYTE len + WCHAR data +if (0 != cchByteLen) +{ + pTdsSS->SetRecoverDb( + reinterpret_cast(pbCurr + sizeof(BYTE)), cchByteLen); +} +``` + +**Critical detail**: If the client sends a zero-length database in the delta (ToBe) chunk — meaning +"no change from initial" — the ToBe object retains the **cloned initial** value. The server will +then restore the session to the initial database, not the current one. + +### 3. Database Determination During Recovery Login + +**File**: `/sql/ntdbms/frontend/ods/login.cpp` + +`LoginUseDbHelper::FDetermineSessionDb` (lines 6442–7066) determines the session's database using a +**3-source priority algorithm**: + +| Priority | Source | Description | +|----------|--------|-------------| +| **Source #0** (highest) | Recovery feature extension | `pTdsSS->GetRecoverDb()` from the ToBe chunk | +| Source #1 | Login record | `pss->PchLoginDbName` (initial catalog from connection string) | +| Source #2 | User's `syslogins` row | Default database for the login | + +For recovered connections (`pSess->FIsRecovered()`): + +```cpp +if (pSess->FIsRecovered() && !pSess->FSDS()) +{ + CTdsSessionState* pTdsSS = pSess->PTdsSessionState(fIsRedoLogin); + BYTE cchLen = 0; + pwchLoginDb = const_cast(pTdsSS->GetRecoverDb(cchLen)); + cbLoginDb = cchLen * sizeof(WCHAR); + + DBG_ASSERT(cbLoginDb > 0); // recovery db can never be empty +``` + +Source #0 is **mandatory** for recovered sessions. If the database USE fails (database dropped, +offline, etc.), the connection fails entirely (outcome #7) rather than silently falling back to a +different database. + +The server then compares the initial and ToBe databases, and if they differ, sets a +`fDbCollPendingUpdate` flag so the session's stored database/collation pair gets updated on first +use. + +### 4. Other Session State Recovery + +**File**: `/sql/ntdbms/frontend/execenv/session.cpp` + +`CSession::FRecoverSessionStateFromTDS` (lines 1682–1737) recovers session state **excluding +database, collation, and language** — those are handled separately by `FDetermineSessionDb` and +`FDetermineSessionLang` during the login flow. This function restores: + +- Identity insert settings +- User options (lock timeout, text size, date format, etc.) +- Context info buffer + +### 5. Server Sends Back Session State + +**File**: `/sql/ntdbms/tds/src/tdsimpl.cpp` + +`CTds74::SendSessionStateDataImpl` (lines 7236–7452) serializes the session state snapshot back to +the client as a `FEATUREEXT_SESSIONRECOVERY` acknowledgment. The `WriteFeatureAck` function: + +```cpp +bool CSessionRecoveryFeatureExtension::WriteFeatureAck(CNetConnection * pNetConn) +{ + // ... + pCOut->SendSessionStateData(m_pTdsSessionStateDataToBe, true /*fFeatureAck*/); + return true; +} +``` + +--- + +## How Issue #4108 Can Manifest + +### Path 1: Client sends zero-length database delta (reference equality bug concern) + +In the client's `WriteSessionRecoveryFeatureRequest` (TdsParser.cs, line 8963): + +```csharp +currentLength += 1 + 2 * (reconnectData._initialDatabase == reconnectData._database + ? 0 + : TdsParserStaticMethods.NullAwareStringLength(reconnectData._database)); +``` + +If `_initialDatabase == _database` evaluates to `true`, the client sends a zero-length database in +the ToBe chunk. The server clones the Init chunk's database — which is correct only if the user +never switched databases. In C#, `string ==` performs value comparison (not reference), so this +comparison is correct in normal operation. + +**However**, if `_database` is `null` (which happens after `SessionData.Reset()`), the comparison +is `_initialDatabase == null`. If `_initialDatabase` is non-null, the result is `false` and the +null database gets serialized — this would be a protocol error. The `CurrentSessionData` getter is +supposed to set `_database = CurrentDatabase` before the snapshot, which prevents this case. But +if the getter's write races with another operation (Issue B — thread safety), null could leak +through. + +### Path 2: Server correctly restores DB but client ignores it + +This is the **original root cause** of #4108. Even when the server properly restores the database +and sends `ENV_CHANGE(ENV_DATABASE)` with the correct database, the old `CompleteLogin()` code +simply set `_recoverySessionData = null` without checking whether `CurrentDatabase` matched the +recovery target. If the ENV_CHANGE was somehow missed, overwritten, or carried the wrong database, +the client silently diverged. + +**The fix** (implemented in PR #4130) adds a post-recovery verification: if `CurrentDatabase` +differs from the recovery target, a `USE [db]` is issued to force alignment. + +### Path 3: Double reconnection — stale `_initialDatabase` + +After the first recovery: +1. `CompleteLogin()` sets `_currentSessionData._initialDatabase = CurrentDatabase`. +2. If the connection breaks again immediately, the new `_recoverySessionData._initialDatabase` + is the **recovered** database (e.g. `"my_database"`), not the original `InitialCatalog`. +3. The client sends Init = `"my_database"` and ToBe = `"my_database"` (same) → zero-length delta. +4. The server clones Init and gets `"my_database"` in ToBe → correctly restores it. + +This path **works correctly** because `_initialDatabase` is updated to `CurrentDatabase` after +each successful login. The delta is effectively "no change" and the server restores the correct +database. + +### Path 4: Pool reset between USE and reconnection + +If the connection is returned to the pool after `USE [db]`: +1. `ResetConnection()` sets `CurrentDatabase = _originalDatabase` (initial catalog). +2. Server executes `sp_reset_connection` → database context reverts. +3. `SessionData.Reset()` clears `_database` to `null`. +4. Next acquisition uses the connection from the pool, already on initial catalog. + +If the connection breaks **after** pool reset but **before** the next user acquires it, the +recovery data has `_database = null` and `_initialDatabase = InitialCatalog`. The recovery +correctly restores to `InitialCatalog`. This is correct behavior — pool reset intentionally +discards the `USE` state. + +### Path 5: MARS connections + +With MARS enabled, `SqlConnection.ServerProcessId` returns 0, so `KILL {spid}` via that property +doesn't work. The manual tests account for this by using `SELECT @@SPID` instead. However, MARS +has an additional constraint: if multiple active result sets exist when the connection breaks, +`ValidateAndReconnect` throws `CR_UnrecoverableClient` — no silent recovery occurs. + +If there is exactly one active result set (or none) and the connection breaks, MARS reconnection +follows the same path as non-MARS. The session data snapshot and recovery request include the +correct `_database` value. + +--- + +## Test Coverage Analysis + +| Scenario | Unit Test | Manual Test | Adequacy | +|----------|-----------|-------------|----------| +| Basic USE → reconnect → correct recovery | Yes | Yes | Good | +| ChangeDatabase → reconnect | Yes | Yes | Good | +| Server returns wrong DB (ENV_CHANGE) | Yes (3 behaviors) | N/A | Good (can't control real server) | +| Server omits ENV_CHANGE entirely | Yes | N/A | Good | +| Pooled connection | Yes | Yes | Good | +| MARS | No | Yes | Medium — no simulated server test | +| Multiple database switches | No | Yes | Medium — no simulated server test | +| Double kill/reconnection | No | Yes | Medium — complex timing | +| Stress loop (100 iterations) | No | Yes | Good for catching intermittent failures | +| CREATE TABLE after reconnect (proof of server context) | No | Yes | Best evidence test | +| Pooled + double kill | No | No | **Gap** | +| MARS + buggy server recovery | No | No | **Gap** | +| Async double kill | No | No | **Gap** | + +### Gaps to Consider + +1. **Pooled + double kill**: No test combines connection pooling with two consecutive connection + kills. The pool's `ReplaceConnection` path may have different timing for `_initialDatabase` + update. + +2. **MARS + buggy server**: The unit test infrastructure supports MARS but no test combines MARS + with `RecoveryDatabaseBehavior.SendInitialCatalog`. However, the fix in `CompleteLogin` is + invoked identically for MARS and non-MARS connections, so the risk is low. + +3. **Async double kill**: The async reconnection paths (`RunExecuteReaderTdsSetupReconnectContinuation`) + differ from sync in continuation scheduling. A double kill during async reconnection exercises + different code paths. + +--- + +## Conclusion + +The SQL Server engine correctly implements session recovery for database context when the client +sends the right data. The server uses a **mandatory priority** for Source #0 (recovery database) +that never silently falls back to a different database. + +The root cause of #4108 is on the **client side**: `CompleteLogin()` did not verify whether the +server actually applied the recovered database context. The fix (post-recovery `USE` command) is +correct and handles all identified failure paths — buggy server response, missing ENV_CHANGE, and +edge cases — without adding overhead in the common case where the server behaves correctly. diff --git a/policy/coding-style.md b/policy/coding-style.md index 223b09039e..5e74c6d63a 100644 --- a/policy/coding-style.md +++ b/policy/coding-style.md @@ -15,8 +15,10 @@ The general rule we follow is "_use Visual Studio defaults_". 1. Text files should be UTF-8 encoded, no BOM, and use Unix line endings (LF). The exception is Windows script files, which may use CRLF. -1. Lines should be a maximum of 100 characters. Exceptions are made for long content such as URLs, - and when breaking text would be less readable. +1. Lines should be a maximum of 100 characters. Prefer filling lines to their full width rather + than breaking unnecessarily short; short-wrapped lines waste vertical space and hurt + readability. Exceptions are made for long content such as URLs, and when breaking text would + be less readable. 1. We use [Allman style](http://en.wikipedia.org/wiki/Indent_style#Allman_style) braces, where each brace begins on a new line. A single line statement block can go without braces but the block must be properly indented on its own line and must not be nested in other statement blocks that @@ -57,6 +59,9 @@ The general rule we follow is "_use Visual Studio defaults_". instead of literal characters. Literal non-ASCII characters occasionally get garbled by a tool or editor. 1. When using labels (for goto), indent the label one less than the current indentation. +1. We encourage the use of `#region` / `#endregion` to group related members within a class (e.g. + nested types, helpers, constants, test groups). Regions improve navigability in large files and + make the logical structure visible at a glance. 1. When using a single-statement if, we follow these conventions: - Never use single-line form (for example: `if (source == null) throw new ArgumentNullException("source");`) diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs index cf0a296b30..ec154c87a9 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs @@ -2314,7 +2314,47 @@ private void CompleteLogin(bool enlistOK) // @TODO: Rename as per guidelines _currentSessionData._encrypted = isEncrypted; } + // After successful session recovery, ensure that both the client and + // server agree on the current database. The recovery data carries the + // database the session was using before the connection dropped. If the + // server's login response (ENV_CHANGE) set a different database — or + // omitted the token entirely — we must issue a USE command to realign + // the server and then update CurrentDatabase to match. + string recoveredDatabase = null; + if (_recoverySessionData != null + && LocalAppContextSwitches.VerifyRecoveredDatabaseContext) + { + recoveredDatabase = _recoverySessionData._database + ?? _recoverySessionData._initialDatabase; + } + _recoverySessionData = null; + + if (recoveredDatabase != null + && !string.Equals(CurrentDatabase, recoveredDatabase, StringComparison.OrdinalIgnoreCase)) + { + // The server is not on the expected database. Force it there. + string safeName = SqlConnection.FixupDatabaseTransactionName(recoveredDatabase); + _parser._physicalStateObj.SniContext = SniContext.Snix_Login; + Task executeTask = _parser.TdsExecuteSQLBatch( + $"USE {safeName}", + ConnectionOptions.ConnectTimeout, + notificationRequest: null, + _parser._physicalStateObj, + sync: true); + Debug.Assert(executeTask == null, "Shouldn't get a task when doing sync writes"); + _parser.Run( + RunBehavior.UntilDone, + cmdHandler: null, + dataStream: null, + bulkCopyHandler: null, + _parser._physicalStateObj); + _parser._physicalStateObj.SniContext = SniContext.Snix_Login; + + // The USE command triggers an ENV_CHANGE that updates CurrentDatabase, + // but set it explicitly in case the response is unexpected. + CurrentDatabase = recoveredDatabase; + } } Debug.Assert(SniContext.Snix_Login == Parser._physicalStateObj.SniContext, diff --git a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs index 7694092907..3909e7e150 100644 --- a/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs +++ b/src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/LocalAppContextSwitches.cs @@ -141,7 +141,14 @@ internal static class LocalAppContextSwitches /// private const string UseMinimumLoginTimeoutString = "Switch.Microsoft.Data.SqlClient.UseOneSecFloorInTimeoutCalculationDuringLogin"; - + + /// + /// The name of the app context switch that controls whether to verify + /// and correct the database context after session recovery. + /// + private const string VerifyRecoveredDatabaseContextString = + "Switch.Microsoft.Data.SqlClient.VerifyRecoveredDatabaseContext"; + #endregion #region Switch Values @@ -164,7 +171,7 @@ private enum SwitchValue : byte // GOTCHA: These fields are accessed via reflection by the // LocalAppContextSwitchesHelper test helper class. If you rename them, be // sure to update the test helper as well. - + #if NETFRAMEWORK /// /// The cached value of the DisableTnirByDefault switch. @@ -246,6 +253,11 @@ private enum SwitchValue : byte /// private static SwitchValue s_useMinimumLoginTimeout = SwitchValue.None; + /// + /// The cached value of the VerifyRecoveredDatabaseContext switch. + /// + private static SwitchValue s_verifyRecoveredDatabaseContext = SwitchValue.None; + #endregion #region Static Initialization @@ -628,6 +640,21 @@ public static bool UseManagedNetworking defaultValue: true, ref s_useMinimumLoginTimeout); + /// + /// When set to true, the driver verifies after a successful session + /// recovery that the server is on the same database the session was + /// using before the connection dropped. If there is a mismatch the + /// driver issues a USE command to correct the server. This prevents + /// silent database context loss across transparent reconnections. + /// + /// The default value of this switch is false. + /// + public static bool VerifyRecoveredDatabaseContext => + AcquireAndReturn( + VerifyRecoveredDatabaseContextString, + defaultValue: false, + ref s_verifyRecoveredDatabaseContext); + #endregion #region Helpers @@ -636,10 +663,10 @@ public static bool UseManagedNetworking /// Acquires the value of the specified app context switch and stores it /// in the given reference. Applies the default value if the switch isn't /// set. - /// + /// /// If the cached value is already set, it is returned immediately without /// attempting to re-acquire it. - /// + /// /// No attempt is made to prevent multiple threads from acquiring the same /// switch value simultaneously. The worst that can happen is that the /// switch is acquired more than once, and the last acquired value wins. @@ -672,6 +699,6 @@ private static bool AcquireAndReturn( switchValue = acquiredValue ? SwitchValue.True : SwitchValue.False; return acquiredValue; } - + #endregion } diff --git a/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs b/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs index 4a0cdf67de..a84b38fefc 100644 --- a/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs +++ b/src/Microsoft.Data.SqlClient/tests/Common/LocalAppContextSwitchesHelper.cs @@ -58,8 +58,9 @@ public sealed class LocalAppContextSwitchesHelper : IDisposable private readonly bool? _useConnectionPoolV2Original; #if NET && _WINDOWS private readonly bool? _useManagedNetworkingOriginal; - #endif + #endif private readonly bool? _useMinimumLoginTimeoutOriginal; + private readonly bool? _verifyRecoveredDatabaseContextOriginal; #endregion @@ -123,6 +124,8 @@ public LocalAppContextSwitchesHelper() #endif _useMinimumLoginTimeoutOriginal = GetSwitchValue("s_useMinimumLoginTimeout"); + _verifyRecoveredDatabaseContextOriginal = + GetSwitchValue("s_verifyRecoveredDatabaseContext"); } catch { @@ -161,7 +164,7 @@ public void Dispose() "s_ignoreServerProvidedFailoverPartner", _ignoreServerProvidedFailoverPartnerOriginal); SetSwitchValue( - "s_legacyRowVersionNullBehavior", + "s_legacyRowVersionNullBehavior", _legacyRowVersionNullBehaviorOriginal); SetSwitchValue( "s_legacyVarTimeZeroScaleBehaviour", @@ -192,6 +195,9 @@ public void Dispose() SetSwitchValue( "s_useMinimumLoginTimeout", _useMinimumLoginTimeoutOriginal); + SetSwitchValue( + "s_verifyRecoveredDatabaseContext", + _verifyRecoveredDatabaseContextOriginal); } finally { @@ -349,6 +355,15 @@ public bool? UseMinimumLoginTimeout set => SetSwitchValue("s_useMinimumLoginTimeout", value); } + /// + /// Get or set the VerifyRecoveredDatabaseContext switch value. + /// + public bool? VerifyRecoveredDatabaseContext + { + get => GetSwitchValue("s_verifyRecoveredDatabaseContext"); + set => SetSwitchValue("s_verifyRecoveredDatabaseContext", value); + } + #endregion #region Helpers @@ -364,7 +379,7 @@ public bool? UseMinimumLoginTimeout throw new InvalidOperationException( "Could not get assembly for Microsoft.Data.SqlClient"); } - + var type = assembly.GetType("Microsoft.Data.SqlClient.LocalAppContextSwitches"); if (type is null) { @@ -411,7 +426,7 @@ private static void SetSwitchValue(string fieldName, bool? value) throw new InvalidOperationException( "Could not get assembly for Microsoft.Data.SqlClient"); } - + var type = assembly.GetType("Microsoft.Data.SqlClient.LocalAppContextSwitches"); if (type is null) { diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj index 25a1d53d0d..7bffbf20c7 100644 --- a/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/Microsoft.Data.SqlClient.ManualTests.csproj @@ -98,6 +98,7 @@ + @@ -421,5 +422,5 @@ - + diff --git a/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/ConnectivityTests/DatabaseContextReconnectionTest.cs b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/ConnectivityTests/DatabaseContextReconnectionTest.cs new file mode 100644 index 0000000000..74ce653f30 --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/ManualTests/SQL/ConnectivityTests/DatabaseContextReconnectionTest.cs @@ -0,0 +1,774 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Data; +using System.Threading; +using System.Threading.Tasks; +using Xunit; + +namespace Microsoft.Data.SqlClient.ManualTesting.Tests +{ + /// + /// Tests that database context is preserved after transparent reconnection + /// (session recovery) triggered by KILL on a real SQL Server. + /// + /// The VerifyRecoveredDatabaseContext AppContext switch is explicitly + /// set to false so the client-side defensive fix is disabled. Any + /// failure therefore proves that either the server or the driver (without the + /// fix) does not correctly restore the database context during session recovery. + /// + /// + /// Reproduces the scenario from dotnet/SqlClient#4108. + /// + /// + public sealed class DatabaseContextReconnectionTest : IDisposable + { + private const string SwitchName = + "Switch.Microsoft.Data.SqlClient.VerifyRecoveredDatabaseContext"; + + /// + /// Temporary database created for the test run. + /// We switch into this database and verify it survives reconnection. + /// + private readonly string _tempDbName; + + /// + /// Base connection string (from environment) without extra options. + /// + private readonly string _baseConnectionString; + + /// + /// Tracks tables created during a test so they can be cleaned up from + /// the initial catalog if a test failure causes them to land in the + /// wrong database. + /// + private readonly List _createdTableNames = new(); + + public DatabaseContextReconnectionTest() + { + _baseConnectionString = DataTestUtility.TCPConnectionString; + _tempDbName = "sqlclient_dbctx_" + Guid.NewGuid().ToString("N").Substring(0, 12); + + using SqlConnection conn = new(_baseConnectionString); + conn.Open(); + using SqlCommand cmd = conn.CreateCommand(); + cmd.CommandText = $"CREATE DATABASE [{_tempDbName}]"; + cmd.ExecuteNonQuery(); + } + + public void Dispose() + { + using SqlConnection conn = new(_baseConnectionString); + conn.Open(); + + // Clean up any tables that may have been created in the initial + // catalog if the database context bug caused DDL to execute in the + // wrong database. + if (_createdTableNames.Count > 0) + { + string initialCatalog = new SqlConnectionStringBuilder( + _baseConnectionString).InitialCatalog; + + if (!string.IsNullOrEmpty(initialCatalog) + && !string.Equals(initialCatalog, _tempDbName, + StringComparison.OrdinalIgnoreCase)) + { + foreach (string tableName in _createdTableNames) + { + try + { + using SqlCommand cmd = conn.CreateCommand(); + cmd.CommandText = + $"IF OBJECT_ID(N'[{initialCatalog}].dbo.[{tableName}]') " + + $"IS NOT NULL DROP TABLE [{initialCatalog}].dbo.[{tableName}]"; + cmd.ExecuteNonQuery(); + } + catch + { + // Best-effort cleanup + } + } + } + } + + DataTestUtility.DropDatabase(conn, _tempDbName); + } + + #region Helpers + + private SqlConnectionStringBuilder BuildConnectionString( + bool pooling = false, bool mars = false) + { + return new SqlConnectionStringBuilder(_baseConnectionString) + { + ConnectRetryCount = 2, + ConnectRetryInterval = 1, + ConnectTimeout = 10, + Pooling = pooling, + MultipleActiveResultSets = mars, + }; + } + + /// + /// Kill the target connection's SPID from a separate connection and + /// wait long enough for the CheckConnectionWindow to expire. + /// + private void KillSpid(int spid) + { + using SqlConnection killer = new(_baseConnectionString); + killer.Open(); + using SqlCommand cmd = new($"KILL {spid}", killer); + cmd.ExecuteNonQuery(); + // Let the CheckConnectionWindow (5 ms) expire so that the next + // ValidateSNIConnection call detects the dead link. + Thread.Sleep(100); + } + + /// + /// Returns the server-side SPID for the connection. Uses @@SPID + /// query which works correctly with MARS connections (where + /// may return 0). + /// + private static int GetServerSpid(SqlConnection conn) + { + using SqlCommand cmd = new("SELECT @@SPID", conn); + return (short)cmd.ExecuteScalar(); + } + + /// + /// Queries the server for its actual current database via + /// SELECT DB_NAME(). + /// + private static string GetServerDatabase(SqlConnection conn) + { + using SqlCommand cmd = new("SELECT DB_NAME()", conn); + return (string)cmd.ExecuteScalar(); + } + + /// + /// Assert that both client and server agree on the expected database. + /// + private static void AssertDatabaseContext( + SqlConnection conn, string expectedDb, string context) + { + string clientDb = conn.Database; + string serverDb = GetServerDatabase(conn); + + Assert.True( + string.Equals(expectedDb, clientDb, StringComparison.OrdinalIgnoreCase), + $"[{context}] Client database mismatch. " + + $"Expected: '{expectedDb}', connection.Database: '{clientDb}'"); + + Assert.True( + string.Equals(expectedDb, serverDb, StringComparison.OrdinalIgnoreCase), + $"[{context}] Server database mismatch. " + + $"Expected: '{expectedDb}', DB_NAME(): '{serverDb}'"); + } + + /// + /// Returns the connection_id GUID from sys.dm_exec_connections + /// for the current session. This value is unique per physical connection + /// and is guaranteed to change after reconnection, even when SQL Server + /// reuses the same SPID. + /// + private static Guid GetConnectionId(SqlConnection conn) + { + using SqlCommand cmd = new( + "SELECT connection_id FROM sys.dm_exec_connections WHERE session_id = @@SPID", + conn); + return (Guid)cmd.ExecuteScalar(); + } + + #endregion + + #region Single-shot tests + + /// + /// USE [tempDb] → KILL → command → verify both client and server are + /// on tempDb. No pooling, no MARS. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + // Switch to temp database + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + AssertDatabaseContext(conn, _tempDbName, "pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + // This command triggers transparent reconnection + AssertDatabaseContext(conn, _tempDbName, "post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + + /// + /// ChangeDatabase(tempDb) → KILL → command → verify context preserved. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void ChangeDatabase_KillReconnect_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + conn.ChangeDatabase(_tempDbName); + AssertDatabaseContext(conn, _tempDbName, "pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + AssertDatabaseContext(conn, _tempDbName, "post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + + /// + /// Same as but + /// with connection pooling enabled. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_Pooled_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: true); + // Unique pool key so we don't interfere with other tests. + builder.ApplicationName = "DbCtxPoolTest_" + Guid.NewGuid().ToString("N").Substring(0, 8); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + AssertDatabaseContext(conn, _tempDbName, "pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + AssertDatabaseContext(conn, _tempDbName, "post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + + /// + /// Same as but + /// with MARS enabled. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_MARS_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false, mars: true); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + AssertDatabaseContext(conn, _tempDbName, "pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(GetServerSpid(conn)); + + AssertDatabaseContext(conn, _tempDbName, "post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + + #endregion + + #region Stress tests + + /// + /// Runs USE → KILL → verify in a tight loop to surface intermittent + /// failures in session recovery. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_StressLoop_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + const int iterations = 100; + var builder = BuildConnectionString(pooling: false); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + for (int i = 0; i < iterations; i++) + { + string context = $"USE stress iteration {i}"; + + // Switch to temp database (may already be there after + // reconnection, but USE is idempotent). + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + AssertDatabaseContext(conn, _tempDbName, context + " pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + // The next command drives reconnection. + AssertDatabaseContext(conn, _tempDbName, context + " post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + } + + /// + /// Same stress loop but via . + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void ChangeDatabase_KillReconnect_StressLoop_PreservesContext() + { + AppContext.SetSwitch(SwitchName, false); + + const int iterations = 100; + var builder = BuildConnectionString(pooling: false); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + for (int i = 0; i < iterations; i++) + { + string context = $"ChangeDatabase stress iteration {i}"; + + conn.ChangeDatabase(_tempDbName); + AssertDatabaseContext(conn, _tempDbName, context + " pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + AssertDatabaseContext(conn, _tempDbName, context + " post-reconnect"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + } + + #endregion + + #region Object-creation tests + + /// + /// After USE → KILL → reconnect, creates a table via DDL and verifies + /// via a separate connection that it landed in the expected database, + /// not the initial catalog. This is the strongest proof that the + /// server's session context is actually on the right database. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_CreateTable_LandsInCorrectDb() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + string tableName = "tbl_ctx_" + Guid.NewGuid().ToString("N").Substring(0, 8); + _createdTableNames.Add(tableName); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + + KillSpid(conn.ServerProcessId); + + // Create a table — this DDL should execute in _tempDbName + using (SqlCommand createCmd = new( + $"CREATE TABLE [{tableName}] (Id INT PRIMARY KEY, Val NVARCHAR(50))", conn)) + { + createCmd.ExecuteNonQuery(); + } + + // Insert a row to confirm the table is usable + using (SqlCommand insertCmd = new( + $"INSERT INTO [{tableName}] (Id, Val) VALUES (1, 'reconnect_test')", conn)) + { + insertCmd.ExecuteNonQuery(); + } + + // Verify from a separate connection that the table exists in the + // temp database and NOT in the initial catalog. + using SqlConnection verifier = new(_baseConnectionString); + verifier.Open(); + + // Should exist in temp database + using (SqlCommand checkCmd = new( + $"SELECT COUNT(*) FROM [{_tempDbName}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier)) + { + checkCmd.Parameters.AddWithValue("@name", tableName); + int count = (int)checkCmd.ExecuteScalar(); + Assert.True(count == 1, + $"Table '{tableName}' was NOT found in '{_tempDbName}'. " + + "DDL may have executed in the wrong database."); + } + + // Should NOT exist in the initial catalog + string initialCatalog = new SqlConnectionStringBuilder( + _baseConnectionString).InitialCatalog; + if (!string.IsNullOrEmpty(initialCatalog) + && !string.Equals(initialCatalog, _tempDbName, StringComparison.OrdinalIgnoreCase)) + { + using SqlCommand checkInitCmd = new( + $"SELECT COUNT(*) FROM [{initialCatalog}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + checkInitCmd.Parameters.AddWithValue("@name", tableName); + int count = (int)checkInitCmd.ExecuteScalar(); + Assert.True(count == 0, + $"Table '{tableName}' was found in initial catalog '{initialCatalog}'. " + + "DDL executed in the WRONG database after reconnection!"); + } + } + + /// + /// Stress loop: each iteration switches database, kills the connection, + /// creates a uniquely-named table after reconnection, then verifies from + /// a separate connection that every table landed in the correct database. + /// Also runs a variable number of queries before and after the USE to + /// vary the session state and packet buffer contents. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_KillReconnect_StressCreateTables_LandInCorrectDb() + { + AppContext.SetSwitch(SwitchName, false); + + const int iterations = 50; + var builder = BuildConnectionString(pooling: false); + var rng = new Random(42); // deterministic seed for reproducibility + string[] tableNames = new string[iterations]; + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + for (int i = 0; i < iterations; i++) + { + string context = $"stress-create iteration {i}"; + + // Variable workload BEFORE USE — pollute session state + int preQueries = rng.Next(0, 6); + for (int q = 0; q < preQueries; q++) + { + using SqlCommand workCmd = new( + $"SET NOCOUNT ON; SELECT TOP {rng.Next(1, 100)} * FROM sys.objects", conn); + using var reader = workCmd.ExecuteReader(); + while (reader.Read()) { } + } + + // Add session state: SET options increase recovery payload + if (i % 3 == 0) + { + using SqlCommand setCmd = new( + "SET TEXTSIZE 65536; SET LOCK_TIMEOUT 5000", conn); + setCmd.ExecuteNonQuery(); + } + + // Switch to temp database + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + + // Variable workload AFTER USE, BEFORE kill + int postQueries = rng.Next(0, 4); + for (int q = 0; q < postQueries; q++) + { + using SqlCommand workCmd = new("SELECT GETDATE()", conn); + workCmd.ExecuteScalar(); + } + + AssertDatabaseContext(conn, _tempDbName, context + " pre-kill"); + + Guid connIdBefore = GetConnectionId(conn); + KillSpid(conn.ServerProcessId); + + // Reconnection happens here — create a table + string tableName = $"tbl_s{i}_{Guid.NewGuid().ToString("N").Substring(0, 6)}"; + tableNames[i] = tableName; + _createdTableNames.Add(tableName); + + using (SqlCommand createCmd = new( + $"CREATE TABLE [{tableName}] (Id INT)", conn)) + { + createCmd.ExecuteNonQuery(); + } + + AssertDatabaseContext(conn, _tempDbName, context + " post-create"); + Guid connIdAfter = GetConnectionId(conn); + Assert.NotEqual(connIdBefore, connIdAfter); + } + + // Bulk verification: every table must exist in _tempDbName + using SqlConnection verifier = new(_baseConnectionString); + verifier.Open(); + + string initialCatalog = new SqlConnectionStringBuilder( + _baseConnectionString).InitialCatalog; + + for (int i = 0; i < iterations; i++) + { + using SqlCommand checkCmd = new( + $"SELECT COUNT(*) FROM [{_tempDbName}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + checkCmd.Parameters.AddWithValue("@name", tableNames[i]); + int found = (int)checkCmd.ExecuteScalar(); + Assert.True(found == 1, + $"Iteration {i}: Table '{tableNames[i]}' NOT found in '{_tempDbName}'. " + + "Object creation landed in the wrong database."); + + // Negative check against initial catalog + if (!string.IsNullOrEmpty(initialCatalog) + && !string.Equals(initialCatalog, _tempDbName, + StringComparison.OrdinalIgnoreCase)) + { + using SqlCommand negCmd = new( + $"SELECT COUNT(*) FROM [{initialCatalog}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + negCmd.Parameters.AddWithValue("@name", tableNames[i]); + int wrongDb = (int)negCmd.ExecuteScalar(); + Assert.True(wrongDb == 0, + $"Iteration {i}: Table '{tableNames[i]}' found in initial catalog " + + $"'{initialCatalog}' — DDL executed in WRONG database!"); + } + } + } + + /// + /// Two database switches before kill: USE initialCatalog → USE tempDb → KILL. + /// After reconnection, creates a table and verifies the *last* switch won. + /// Catches bugs where only the first database change is recorded in recovery data. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void MultipleDatabaseSwitches_KillReconnect_LastSwitchWins() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + string initialCatalog = new SqlConnectionStringBuilder( + _baseConnectionString).InitialCatalog; + string tableName = "tbl_multi_" + Guid.NewGuid().ToString("N").Substring(0, 8); + _createdTableNames.Add(tableName); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + // Switch away and back multiple times + using (SqlCommand cmd = new($"USE [{_tempDbName}]", conn)) + { + cmd.ExecuteNonQuery(); + } + using (SqlCommand cmd = new($"USE [{initialCatalog}]", conn)) + { + cmd.ExecuteNonQuery(); + } + using (SqlCommand cmd = new($"USE [{_tempDbName}]", conn)) + { + cmd.ExecuteNonQuery(); + } + + AssertDatabaseContext(conn, _tempDbName, "after triple switch"); + + KillSpid(conn.ServerProcessId); + + // After reconnection, create a table — must land in _tempDbName + using (SqlCommand createCmd = new( + $"CREATE TABLE [{tableName}] (Id INT)", conn)) + { + createCmd.ExecuteNonQuery(); + } + + AssertDatabaseContext(conn, _tempDbName, "post-reconnect"); + + // Verify object location + using SqlConnection verifier = new(_baseConnectionString); + verifier.Open(); + + using (SqlCommand checkCmd = new( + $"SELECT COUNT(*) FROM [{_tempDbName}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier)) + { + checkCmd.Parameters.AddWithValue("@name", tableName); + Assert.Equal(1, (int)checkCmd.ExecuteScalar()); + } + + if (!string.Equals(initialCatalog, _tempDbName, + StringComparison.OrdinalIgnoreCase)) + { + using SqlCommand negCmd = new( + $"SELECT COUNT(*) FROM [{initialCatalog}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + negCmd.Parameters.AddWithValue("@name", tableName); + Assert.Equal(0, (int)negCmd.ExecuteScalar()); + } + } + + /// + /// Kills the connection twice in rapid succession: once after USE, and + /// again immediately after the first reconnection completes (before any + /// user queries). Then creates a table and verifies it lands in the + /// correct database. Tests that recovery data is re-snapshotted + /// correctly on the second reconnection. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public void UseDatabase_DoubleKill_CreateTable_LandsInCorrectDb() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + builder.ConnectRetryCount = 3; // Need extra retries for double kill + string tableName = "tbl_dblkill_" + Guid.NewGuid().ToString("N").Substring(0, 8); + _createdTableNames.Add(tableName); + + using SqlConnection conn = new(builder.ConnectionString); + conn.Open(); + + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + useCmd.ExecuteNonQuery(); + } + + // First kill + KillSpid(conn.ServerProcessId); + + // Force reconnection by querying — this triggers session recovery + string dbAfterFirst = GetServerDatabase(conn); + Assert.True( + string.Equals(_tempDbName, dbAfterFirst, StringComparison.OrdinalIgnoreCase), + $"After first kill: expected '{_tempDbName}', got '{dbAfterFirst}'"); + + // Second kill — immediately after first reconnection + KillSpid(conn.ServerProcessId); + + // Create table after second reconnection + using (SqlCommand createCmd = new( + $"CREATE TABLE [{tableName}] (Id INT)", conn)) + { + createCmd.ExecuteNonQuery(); + } + + AssertDatabaseContext(conn, _tempDbName, "after double kill"); + + // Verify object location + using SqlConnection verifier = new(_baseConnectionString); + verifier.Open(); + using SqlCommand checkCmd = new( + $"SELECT COUNT(*) FROM [{_tempDbName}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + checkCmd.Parameters.AddWithValue("@name", tableName); + Assert.Equal(1, (int)checkCmd.ExecuteScalar()); + } + + /// + /// Uses async code paths (ExecuteNonQueryAsync, + /// ExecuteScalarAsync) for the USE, post-reconnect DDL, and + /// verification queries. Async internals differ from sync and may + /// race differently during session recovery. + /// + [ConditionalFact(typeof(DataTestUtility), + nameof(DataTestUtility.AreConnStringsSetup), + nameof(DataTestUtility.IsNotAzureServer))] + public async Task UseDatabase_KillReconnect_Async_CreateTable_LandsInCorrectDb() + { + AppContext.SetSwitch(SwitchName, false); + + var builder = BuildConnectionString(pooling: false); + string tableName = "tbl_async_" + Guid.NewGuid().ToString("N").Substring(0, 8); + _createdTableNames.Add(tableName); + + using SqlConnection conn = new(builder.ConnectionString); + await conn.OpenAsync(); + + using (SqlCommand useCmd = new($"USE [{_tempDbName}]", conn)) + { + await useCmd.ExecuteNonQueryAsync(); + } + + KillSpid(conn.ServerProcessId); + + // Async DDL after reconnection + using (SqlCommand createCmd = new( + $"CREATE TABLE [{tableName}] (Id INT PRIMARY KEY, Val NVARCHAR(50))", conn)) + { + await createCmd.ExecuteNonQueryAsync(); + } + + using (SqlCommand insertCmd = new( + $"INSERT INTO [{tableName}] (Id, Val) VALUES (1, 'async_test')", conn)) + { + await insertCmd.ExecuteNonQueryAsync(); + } + + // Async read-back + using (SqlCommand readCmd = new($"SELECT Val FROM [{tableName}] WHERE Id = 1", conn)) + { + string val = (string)await readCmd.ExecuteScalarAsync(); + Assert.Equal("async_test", val); + } + + // Async DB_NAME check + using (SqlCommand dbCmd = new("SELECT DB_NAME()", conn)) + { + string serverDb = (string)await dbCmd.ExecuteScalarAsync(); + Assert.True( + string.Equals(_tempDbName, serverDb, StringComparison.OrdinalIgnoreCase), + $"Async: expected '{_tempDbName}', DB_NAME(): '{serverDb}'"); + } + + // Verify from separate connection + using SqlConnection verifier = new(_baseConnectionString); + verifier.Open(); + using SqlCommand checkCmd = new( + $"SELECT COUNT(*) FROM [{_tempDbName}].INFORMATION_SCHEMA.TABLES " + + $"WHERE TABLE_NAME = @name", verifier); + checkCmd.Parameters.AddWithValue("@name", tableName); + Assert.Equal(1, (int)checkCmd.ExecuteScalar()); + } + + #endregion + } +} diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs index 59445a26b6..67d8a251ef 100644 --- a/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlClient/LocalAppContextSwitchesTest.cs @@ -30,6 +30,7 @@ public void TestDefaultAppContextSwitchValues() Assert.False(LocalAppContextSwitches.IgnoreServerProvidedFailoverPartner); Assert.False(LocalAppContextSwitches.EnableUserAgent); Assert.False(LocalAppContextSwitches.EnableMultiSubnetFailoverByDefault); + Assert.False(LocalAppContextSwitches.VerifyRecoveredDatabaseContext); #if NET Assert.False(LocalAppContextSwitches.GlobalizationInvariantMode); #endif diff --git a/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/DatabaseContextReconnectionTests.cs b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/DatabaseContextReconnectionTests.cs new file mode 100644 index 0000000000..b6723a4f62 --- /dev/null +++ b/src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/DatabaseContextReconnectionTests.cs @@ -0,0 +1,610 @@ +// Licensed to the .NET Foundation under one or more agreements. The .NET Foundation licenses this +// file to you under the MIT license. See the LICENSE file in the project root for more +// information. + +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.Data.SqlClient.Tests.Common; +using Microsoft.SqlServer.TDS; +using Microsoft.SqlServer.TDS.Done; +using Microsoft.SqlServer.TDS.EndPoint; +using Microsoft.SqlServer.TDS.EnvChange; +using Microsoft.SqlServer.TDS.Info; +using Microsoft.SqlServer.TDS.Servers; +using Microsoft.SqlServer.TDS.SQLBatch; +using Xunit; + +namespace Microsoft.Data.SqlClient.UnitTests.SimulatedServerTests +{ + /// + /// Tests for database context preservation across reconnections. + /// Reproduces the scenario from dotnet/SqlClient#4108: after executing USE [db] and then + /// losing the connection, the reconnected session should retain the switched database. + /// + [Collection("SimulatedServerTests")] + public class DatabaseContextReconnectionTests + { + private const string InitialDatabase = "initialdb"; + private const string SwitchedDatabase = "switcheddb"; + + /// + /// Minimum delay (ms) after disconnecting clients to ensure the + /// CheckConnectionWindow (5 ms) in ValidateSNIConnection has expired, + /// so that the broken connection is detected before the next command. + /// + private const int PostDisconnectDelayMs = 50; + + #region Test Infrastructure + + /// + /// Controls how the test server handles the database ENV_CHANGE during a session + /// recovery login. + /// + private enum RecoveryDatabaseBehavior + { + /// + /// The server correctly sends ENV_CHANGE with the recovered database name taken + /// from the session recovery feature request. This is proper server behavior. + /// + SendRecoveredDatabase, + + /// + /// The server ignores the recovered database and sends ENV_CHANGE with the + /// login packet's initial catalog instead. This simulates a server bug where + /// session recovery does not restore the database context. + /// + SendInitialCatalog, + + /// + /// The server omits the database ENV_CHANGE token entirely from the login + /// response. This simulates a server bug where the database change notification + /// is completely missing after session recovery. + /// + OmitDatabaseEnvChange, + } + + /// + /// A query engine that recognises USE [database] commands and updates the session's + /// current database accordingly, returning the correct EnvChange tokens. + /// + private sealed class DatabaseContextQueryEngine : QueryEngine + { + private static readonly Regex s_useDbRegex = new( + @"^\s*use\s+\[?(?[^\]\s;]+)\]?\s*;?\s*$", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + public DatabaseContextQueryEngine(TdsServerArguments arguments) + : base(arguments) + { + } + + protected override TDSMessageCollection CreateQueryResponse( + ITDSServerSession session, TDSSQLBatchToken batchRequest) + { + string text = batchRequest.Text; + Match match = s_useDbRegex.Match(text); + + if (match.Success) + { + return HandleUseDatabase(session, match.Groups["db"].Value); + } + + return base.CreateQueryResponse(session, batchRequest); + } + + private static TDSMessageCollection HandleUseDatabase(ITDSServerSession session, + string newDatabase) + { + string oldDatabase = session.Database; + session.Database = newDatabase; + + var envChange = new TDSEnvChangeToken( + TDSEnvChangeTokenType.Database, newDatabase, oldDatabase); + + var infoToken = new TDSInfoToken(5701, 2, 0, + $"Changed database context to '{newDatabase}'.", "TestServer"); + + var doneToken = new TDSDoneToken(TDSDoneTokenStatusType.Final); + + var response = new TDSMessage( + TDSMessageType.Response, envChange, infoToken, doneToken); + + return new TDSMessageCollection(response); + } + } + + /// + /// A TDS server subclass that supports testing database context recovery during + /// reconnection. Exposes + /// and allows controlling whether the login response carries the recovered database + /// or the initial catalog via . + /// + private sealed class DisconnectableTdsServer : GenericTdsServer + { + public int Port => EndPoint.Port; + + /// + /// Controls how the server responds to session recovery with a changed database. + /// Default is (correct + /// server behavior). + /// + public RecoveryDatabaseBehavior RecoveryBehavior { get; set; } + = RecoveryDatabaseBehavior.SendRecoveredDatabase; + + /// + /// The database name the server used in the most recent login ENV_CHANGE response. + /// Useful for test assertions. + /// + public string? LastLoginResponseDatabase { get; private set; } + + public DisconnectableTdsServer( + RecoveryDatabaseBehavior behavior = RecoveryDatabaseBehavior.SendRecoveredDatabase) + : this(new TdsServerArguments(), behavior) + { + } + + private DisconnectableTdsServer(TdsServerArguments args, + RecoveryDatabaseBehavior behavior) + : base(args, new DatabaseContextQueryEngine(args)) + { + RecoveryBehavior = behavior; + Start(); + } + + public new void DisconnectAllClients() + => base.DisconnectAllClients(); + + /// + /// Overrides the login response to control whether the database ENV_CHANGE + /// carries the recovered database or the initial catalog. + /// + protected override TDSMessageCollection OnAuthenticationCompleted( + ITDSServerSession session) + { + if (RecoveryBehavior == RecoveryDatabaseBehavior.SendInitialCatalog + && session.IsSessionRecoveryEnabled + && Login7Count > 1) + { + // Simulate a server bug: after session recovery inflated the session + // (which set session.Database to the recovered DB), forcibly revert + // session.Database to the initial catalog before the base class builds + // the ENV_CHANGE token. + session.Database = InitialDatabase; + } + + TDSMessageCollection result = base.OnAuthenticationCompleted(session); + + TDSMessage msg = result[0]; + + if (RecoveryBehavior == RecoveryDatabaseBehavior.OmitDatabaseEnvChange + && session.IsSessionRecoveryEnabled + && Login7Count > 1) + { + // Strip the database ENV_CHANGE and its accompanying INFO(5701) + // token from the response to simulate a server that never sends + // the database change notification. + for (int i = msg.Count - 1; i >= 0; i--) + { + if (msg[i] is TDSEnvChangeToken ec + && ec.Type == TDSEnvChangeTokenType.Database) + { + msg.RemoveAt(i); + } + else if (msg[i] is TDSInfoToken info && info.Number == 5701) + { + msg.RemoveAt(i); + } + } + + LastLoginResponseDatabase = null; + } + else + { + // Capture the database from the ENV_CHANGE for test assertions. + foreach (var token in msg) + { + if (token is TDSEnvChangeToken envChange + && envChange.Type == TDSEnvChangeTokenType.Database) + { + LastLoginResponseDatabase = (string)envChange.NewValue; + } + } + } + + return result; + } + } + + #endregion + + #region Helpers + + private static SqlConnectionStringBuilder CreateConnectionStringBuilder(int port, + string initialCatalog = InitialDatabase) + { + return new SqlConnectionStringBuilder + { + DataSource = $"localhost,{port}", + InitialCatalog = initialCatalog, + Encrypt = SqlConnectionEncryptOption.Optional, + ConnectRetryCount = 2, + ConnectRetryInterval = 1, + ConnectTimeout = 10, + Pooling = false, + }; + } + + /// + /// Disconnect all server clients and wait for the + /// CheckConnectionWindow to expire so that the next + /// ValidateSNIConnection call properly detects the dead link. + /// + private static void DisconnectAndWait(DisconnectableTdsServer server) + { + server.DisconnectAllClients(); + System.Threading.Thread.Sleep(PostDisconnectDelayMs); + } + + #endregion + + #region Baseline Tests + + /// + /// Verifies that after executing USE [database], the + /// property reflects the switched database context. This is the baseline behaviour that + /// must work even without reconnection. + /// + [Fact] + public void UseDatabaseCommand_UpdatesConnectionDatabaseProperty() + { + using DisconnectableTdsServer server = new(); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + Assert.Equal(InitialDatabase, connection.Database); + + using SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection); + cmd.ExecuteNonQuery(); + + Assert.Equal(SwitchedDatabase, connection.Database); + } + + /// + /// Verifies that sends the correct protocol + /// messages and updates the Database property. + /// + [Fact] + public void ChangeDatabase_UpdatesConnectionDatabaseProperty() + { + using DisconnectableTdsServer server = new(); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + Assert.Equal(InitialDatabase, connection.Database); + + connection.ChangeDatabase(SwitchedDatabase); + + Assert.Equal(SwitchedDatabase, connection.Database); + } + + #endregion + + #region Reconnection Tests — Proper Server Recovery + + /// + /// After switching the database via USE [db] and reconnecting, the server properly + /// restores the database context via session recovery and sends the correct ENV_CHANGE. + /// The client's must reflect the recovered database. + /// + [Fact] + public void UseDatabase_ProperRecovery_DatabaseContextPreservedAfterReconnect() + { + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.SendRecoveredDatabase); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + using (SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + // The server sent ENV_CHANGE with the recovered database. + Assert.Equal(SwitchedDatabase, server.LastLoginResponseDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + } + + /// + /// After switching via and reconnecting, + /// the server properly restores the database context. + /// + [Fact] + public void ChangeDatabase_ProperRecovery_DatabaseContextPreservedAfterReconnect() + { + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.SendRecoveredDatabase); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + connection.ChangeDatabase(SwitchedDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, server.LastLoginResponseDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + } + + /// + /// With pooling enabled, the reconnected connection should still preserve the database + /// context through session recovery when the server properly handles it. + /// + [Fact] + public void UseDatabase_ProperRecovery_Pooled_DatabaseContextPreservedAfterReconnect() + { + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.SendRecoveredDatabase); + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.Port}", + InitialCatalog = InitialDatabase, + Encrypt = SqlConnectionEncryptOption.Optional, + ConnectRetryCount = 2, + ConnectRetryInterval = 1, + ConnectTimeout = 10, + Pooling = true, + }; + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + using (SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, server.LastLoginResponseDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + + SqlConnection.ClearPool(connection); + } + + #endregion + + #region Reconnection Tests — Buggy Server (wrong database in recovery) + + /// + /// Simulates a server bug where session recovery acknowledges the feature but does + /// NOT restore the database context — the ENV_CHANGE carries the initial catalog + /// instead of the recovered database. + /// + /// When VerifyRecoveredDatabaseContext is true, the client detects + /// the mismatch and issues a corrective USE command. When false + /// (the default), the client trusts the server's ENV_CHANGE and reports the + /// initial catalog. + /// + [Theory] + [InlineData(true)] + [InlineData(false)] + public void UseDatabase_BuggyRecovery_DatabaseContextDependsOnSwitch( + bool verifyRecoveredDb) + { + using LocalAppContextSwitchesHelper switchesHelper = new(); + switchesHelper.VerifyRecoveredDatabaseContext = verifyRecoveredDb; + + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.SendInitialCatalog); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + using (SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + // The buggy server sent InitialCatalog in ENV_CHANGE. + Assert.Equal(InitialDatabase, server.LastLoginResponseDatabase); + + string expectedDatabase = verifyRecoveredDb ? SwitchedDatabase : InitialDatabase; + Assert.Equal(expectedDatabase, connection.Database); + } + + /// + /// Same as + /// but using . + /// + [Theory] + [InlineData(true)] + [InlineData(false)] + public void ChangeDatabase_BuggyRecovery_DatabaseContextDependsOnSwitch( + bool verifyRecoveredDb) + { + using LocalAppContextSwitchesHelper switchesHelper = new(); + switchesHelper.VerifyRecoveredDatabaseContext = verifyRecoveredDb; + + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.SendInitialCatalog); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + connection.ChangeDatabase(SwitchedDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(InitialDatabase, server.LastLoginResponseDatabase); + + string expectedDatabase = verifyRecoveredDb ? SwitchedDatabase : InitialDatabase; + Assert.Equal(expectedDatabase, connection.Database); + } + + #endregion + + #region Reconnection Tests — Buggy Server (omitted database ENV_CHANGE) + + /// + /// Simulates a server bug where the database ENV_CHANGE token is completely + /// omitted from the login response during session recovery. + /// + /// When VerifyRecoveredDatabaseContext is true, the client detects + /// the mismatch and issues a corrective USE command. When false + /// (the default), the client falls back to the initial catalog. + /// + [Theory] + [InlineData(true)] + [InlineData(false)] + public void UseDatabase_OmittedEnvChange_DatabaseContextDependsOnSwitch( + bool verifyRecoveredDb) + { + using LocalAppContextSwitchesHelper switchesHelper = new(); + switchesHelper.VerifyRecoveredDatabaseContext = verifyRecoveredDb; + + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.OmitDatabaseEnvChange); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + using (SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + // The server never sent a database ENV_CHANGE. + Assert.Null(server.LastLoginResponseDatabase); + + string expectedDatabase = verifyRecoveredDb ? SwitchedDatabase : InitialDatabase; + Assert.Equal(expectedDatabase, connection.Database); + } + + /// + /// Same as + /// but using . + /// + [Theory] + [InlineData(true)] + [InlineData(false)] + public void ChangeDatabase_OmittedEnvChange_DatabaseContextDependsOnSwitch( + bool verifyRecoveredDb) + { + using LocalAppContextSwitchesHelper switchesHelper = new(); + switchesHelper.VerifyRecoveredDatabaseContext = verifyRecoveredDb; + + using DisconnectableTdsServer server = new(RecoveryDatabaseBehavior.OmitDatabaseEnvChange); + SqlConnectionStringBuilder builder = CreateConnectionStringBuilder(server.Port); + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + connection.ChangeDatabase(SwitchedDatabase); + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + using (SqlCommand cmd = new("SELECT 1", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Null(server.LastLoginResponseDatabase); + + string expectedDatabase = verifyRecoveredDb ? SwitchedDatabase : InitialDatabase; + Assert.Equal(expectedDatabase, connection.Database); + } + + #endregion + + #region No-Retry Tests + + /// + /// Verifies that with ConnectRetryCount=0, session recovery is not negotiated and a + /// broken connection raises an error rather than silently reconnecting with a wrong + /// database context. + /// + [Fact] + public void UseDatabase_ConnectionDropped_NoRetry_ThrowsOnNextCommand() + { + using DisconnectableTdsServer server = new(); + SqlConnectionStringBuilder builder = new() + { + DataSource = $"localhost,{server.Port}", + InitialCatalog = InitialDatabase, + Encrypt = SqlConnectionEncryptOption.Optional, + ConnectRetryCount = 0, + ConnectTimeout = 5, + Pooling = false, + }; + + using SqlConnection connection = new(builder.ConnectionString); + connection.Open(); + + using (SqlCommand cmd = new($"USE [{SwitchedDatabase}]", connection)) + { + cmd.ExecuteNonQuery(); + } + + Assert.Equal(SwitchedDatabase, connection.Database); + + DisconnectAndWait(server); + + // With ConnectRetryCount=0, no transparent reconnection should occur. + using SqlCommand cmd2 = new("SELECT 1", connection); + Assert.ThrowsAny(() => cmd2.ExecuteNonQuery()); + } + + #endregion + } +} diff --git a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.EndPoint/TDSServerEndPoint.cs b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.EndPoint/TDSServerEndPoint.cs index 349b23ceee..6fae448c66 100644 --- a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.EndPoint/TDSServerEndPoint.cs +++ b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.EndPoint/TDSServerEndPoint.cs @@ -109,6 +109,26 @@ public void Start() Log($"{GetType().Name} {EndpointName} Listener Thread Started "); } + /// + /// Forcibly close every active client connection without + /// stopping the TCP listener. The server continues to + /// accept new connections (e.g. reconnection attempts). + /// + public void DisconnectAll() + { + IList snapshot; + + lock (Connections) + { + snapshot = new List(Connections); + } + + foreach (T connection in snapshot) + { + connection.Dispose(); + } + } + /// /// Stop the listener thread /// diff --git a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/GenericTdsServer.cs b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/GenericTdsServer.cs index 1eb8bdf8c6..98071fe6b9 100644 --- a/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/GenericTdsServer.cs +++ b/src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/GenericTdsServer.cs @@ -1053,7 +1053,7 @@ private byte[] _GenerateRandomBytes(int count) { rng.GetBytes(randomBytes); } - + return randomBytes; } @@ -1078,6 +1078,16 @@ private bool AreEqual(byte[] left, byte[] right) return left.SequenceEqual(right); } + /// + /// Forcibly close every active client connection without stopping + /// the TCP listener. The server continues to accept new + /// connections (e.g. reconnection attempts). + /// + protected void DisconnectAllClients() + { + _endpoint?.DisconnectAll(); + } + public virtual void Dispose() { _endpoint?.Dispose();