From abc8ffb014f4d33c247614b9f03957470fa942ed Mon Sep 17 00:00:00 2001 From: 0xharkirat Date: Tue, 14 Apr 2026 15:51:06 +1000 Subject: [PATCH 1/5] feat(cloud): minimal Foundry Cloud settings UI (BYOK Slice 5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the smallest possible Cloud Brain screen: paste full Azure endpoint URL + API key, save to secure storage, switch routing to cloudPreferred. No provider dropdown, no mode toggle, no test- connection button — those land in later iterations. - New AzureUrlParser parses the verbatim Azure portal URL into AzureConfig fields (baseUrl, deployment name as model, apiVersion query param). Supports classic openai.azure.com, cognitiveservices.azure.com, and Foundry services.ai.azure.com domains. Accepts URLs with or without trailing /chat/completions. 13 unit tests covering happy + error paths. - New CloudBrainScreen with two FTextField inputs (URL multiline, API key obscured), Save button (parses + persists + flips mode), Clear button (wipes config + reverts mode), inline error banner on parse failure, status banner on success. Pre-populates URL field from existing config but never the API key. - New /settings/cloud-brain route in hark_router. - New "Cloud brain (beta)" section + row in the main settings screen with on/off pill and the configured deployment name as subtitle. User flow: 1. Settings → Cloud brain (beta) row → Cloud Brain screen 2. Paste full URL from Azure portal Keys & Endpoint tab 3. Paste API key 4. Save → routing flips to cloudPreferred 5. Next voice command takes the cloud path The parser tolerates the URL shape Azure surfaces in the portal (classic /openai/deployments/{name}/chat/completions?api-version=...) without forcing the user to pre-trim or break it into pieces. Co-Authored-By: Claude Opus 4.6 --- lib/router/hark_router.dart | 7 + lib/screens/cloud_brain_screen.dart | 270 ++++++++++++++++++ lib/screens/settings_screen.dart | 40 +++ lib/services/cloud/azure_url_parser.dart | 108 +++++++ .../services/cloud/azure_url_parser_test.dart | 166 +++++++++++ 5 files changed, 591 insertions(+) create mode 100644 lib/screens/cloud_brain_screen.dart create mode 100644 lib/services/cloud/azure_url_parser.dart create mode 100644 test/services/cloud/azure_url_parser_test.dart diff --git a/lib/router/hark_router.dart b/lib/router/hark_router.dart index fec871c..f7db5f2 100644 --- a/lib/router/hark_router.dart +++ b/lib/router/hark_router.dart @@ -4,6 +4,7 @@ import 'package:go_router/go_router.dart'; import '../screens/available_actions_screen.dart'; import '../screens/chat_screen.dart'; +import '../screens/cloud_brain_screen.dart'; import '../screens/settings_screen.dart'; import '../screens/splash_screen.dart'; import '../state/init_notifier.dart'; @@ -17,6 +18,7 @@ class HarkRoutes { static const chat = '/chat'; static const actions = '/actions'; static const settings = '/settings'; + static const cloudBrain = '/settings/cloud-brain'; } /// Provides the app's singleton [GoRouter]. @@ -73,6 +75,11 @@ final goRouterProvider = Provider((ref) { name: 'settings', builder: (_, _) => const SettingsScreen(), ), + GoRoute( + path: HarkRoutes.cloudBrain, + name: 'cloud-brain', + builder: (_, _) => const CloudBrainScreen(), + ), ], ); }); diff --git a/lib/screens/cloud_brain_screen.dart b/lib/screens/cloud_brain_screen.dart new file mode 100644 index 0000000..2342a9a --- /dev/null +++ b/lib/screens/cloud_brain_screen.dart @@ -0,0 +1,270 @@ +import 'package:flutter/widgets.dart'; +import 'package:flutter_riverpod/flutter_riverpod.dart'; +import 'package:forui/forui.dart'; +import 'package:go_router/go_router.dart'; + +import '../services/cloud/azure_url_parser.dart'; +import '../services/cloud/cloud_provider_config.dart'; +import '../state/cloud_provider_notifier.dart'; + +/// Minimal Cloud Brain settings screen — single Foundry / Azure +/// integration with two inputs (full URL + API key). Saves to secure +/// storage via [CloudProviderNotifier] and bumps the routing mode to +/// `cloudPreferred` so the next voice command takes the cloud path. +/// +/// This is the Slice 5 "minimum viable UI" — no provider dropdown, no +/// mode toggle, no test-connection button, no cost meter. Those land in +/// later slices once the basic save/load round trip is solid. +class CloudBrainScreen extends ConsumerStatefulWidget { + const CloudBrainScreen({super.key}); + + @override + ConsumerState createState() => _CloudBrainScreenState(); +} + +class _CloudBrainScreenState extends ConsumerState { + final _urlController = TextEditingController(); + final _apiKeyController = TextEditingController(); + final _parser = const AzureUrlParser(); + + String? _errorMessage; + String? _statusMessage; + bool _saving = false; + + @override + void initState() { + super.initState(); + // Pre-populate the URL field if a config is already saved so the + // user can see what they had last time. Never pre-populate the API + // key — making them paste it again is the safer default. + Future.microtask(_loadExisting); + } + + Future _loadExisting() async { + final notifier = ref.read(cloudProviderNotifierProvider.notifier); + await notifier.awaitInitialLoad(); + if (!mounted) return; + final state = ref.read(cloudProviderNotifierProvider); + final config = state.config; + if (config is AzureConfig) { + // Reconstruct the kind of URL the user originally pasted, so the + // field round-trips visibly. This is just `baseUrl` with the + // api-version query param tacked back on. + _urlController.text = + '${config.baseUrl}/chat/completions?api-version=${config.apiVersion}'; + setState(() { + _statusMessage = 'Configured · deployment ${config.model}'; + }); + } + } + + @override + void dispose() { + _urlController.dispose(); + _apiKeyController.dispose(); + super.dispose(); + } + + Future _save() async { + setState(() { + _errorMessage = null; + _statusMessage = null; + _saving = true; + }); + + try { + final config = _parser.parse( + rawUrl: _urlController.text, + apiKey: _apiKeyController.text, + ); + if (config.apiKey.isEmpty) { + throw const FormatException('API key is empty.'); + } + + final notifier = ref.read(cloudProviderNotifierProvider.notifier); + await notifier.setConfig(config); + // Flip routing to cloudPreferred so the next voice command + // actually exercises the cloud path. The user can still toggle + // back to local-only via a future settings screen. + await notifier.setMode(CloudRoutingMode.cloudPreferred); + + if (!mounted) return; + // Don't keep the key in the field after a successful save. + _apiKeyController.clear(); + setState(() { + _statusMessage = + 'Saved · deployment ${config.model} · cloud preferred'; + }); + } on FormatException catch (e) { + if (!mounted) return; + setState(() { + _errorMessage = e.message; + }); + } catch (e) { + if (!mounted) return; + setState(() { + _errorMessage = 'Save failed: $e'; + }); + } finally { + if (mounted) setState(() => _saving = false); + } + } + + Future _clear() async { + setState(() { + _errorMessage = null; + _statusMessage = null; + _saving = true; + }); + try { + final notifier = ref.read(cloudProviderNotifierProvider.notifier); + await notifier.clearConfig(); + await notifier.setMode(CloudRoutingMode.localOnly); + _urlController.clear(); + _apiKeyController.clear(); + if (!mounted) return; + setState(() { + _statusMessage = 'Cleared · using local Qwen3'; + }); + } catch (e) { + if (!mounted) return; + setState(() { + _errorMessage = 'Clear failed: $e'; + }); + } finally { + if (mounted) setState(() => _saving = false); + } + } + + @override + Widget build(BuildContext context) { + final colors = context.theme.colors; + final typography = context.theme.typography; + final state = ref.watch(cloudProviderNotifierProvider); + final hasConfig = state.hasConfig; + + return FScaffold( + header: FHeader.nested( + title: const Text('Cloud Brain'), + prefixes: [ + FButton.icon( + onPress: () => context.pop(), + variant: FButtonVariant.ghost, + child: const Icon(FIcons.arrowLeft), + ), + ], + ), + child: ListView( + padding: const EdgeInsets.fromLTRB(16, 16, 16, 32), + children: [ + Text( + 'Foundry / Azure OpenAI', + style: typography.lg.copyWith(fontWeight: FontWeight.w600), + ), + const SizedBox(height: 6), + Text( + 'Paste the full endpoint URL and API key from the Azure ' + 'portal (Keys and Endpoint tab). Hark sends voice transcripts ' + 'directly to your deployment — they never go through Hark.', + style: typography.sm.copyWith(color: colors.mutedForeground), + ), + const SizedBox(height: 20), + + Text( + 'Endpoint URL', + style: typography.sm.copyWith(fontWeight: FontWeight.w600), + ), + const SizedBox(height: 6), + FTextField( + control: FTextFieldControl.managed( + controller: _urlController, + ), + hint: + 'https://{resource}.cognitiveservices.azure.com/openai/deployments/{deployment}/chat/completions?api-version=...', + maxLines: 4, + ), + + const SizedBox(height: 16), + Text( + 'API key', + style: typography.sm.copyWith(fontWeight: FontWeight.w600), + ), + const SizedBox(height: 6), + FTextField( + control: FTextFieldControl.managed( + controller: _apiKeyController, + ), + hint: hasConfig ? '••••• (saved — paste to replace)' : 'Azure key', + obscureText: true, + ), + + if (_errorMessage != null) ...[ + const SizedBox(height: 12), + _StatusBanner( + text: _errorMessage!, + color: colors.destructive, + ), + ], + if (_statusMessage != null) ...[ + const SizedBox(height: 12), + _StatusBanner( + text: _statusMessage!, + color: colors.primary, + ), + ], + + const SizedBox(height: 20), + FButton( + onPress: _saving ? null : _save, + child: Text(_saving ? 'Saving…' : 'Save'), + ), + if (hasConfig) ...[ + const SizedBox(height: 10), + FButton( + onPress: _saving ? null : _clear, + variant: FButtonVariant.secondary, + child: const Text('Clear'), + ), + ], + + const SizedBox(height: 24), + Text( + 'Privacy', + style: typography.sm.copyWith(fontWeight: FontWeight.w600), + ), + const SizedBox(height: 6), + Text( + 'Your API key is stored encrypted via Android Keystore. On ' + 'rooted devices the key file name is visible but the value ' + 'is not. When cloud is on, voice transcripts are sent ' + 'directly to your provider — Hark never sees them.', + style: typography.xs.copyWith(color: colors.mutedForeground), + ), + ], + ), + ); + } +} + +class _StatusBanner extends StatelessWidget { + const _StatusBanner({required this.text, required this.color}); + + final String text; + final Color color; + + @override + Widget build(BuildContext context) { + return Container( + padding: const EdgeInsets.symmetric(horizontal: 12, vertical: 10), + decoration: BoxDecoration( + color: color.withValues(alpha: 0.12), + borderRadius: BorderRadius.circular(8), + border: Border.all(color: color.withValues(alpha: 0.35)), + ), + child: Text( + text, + style: context.theme.typography.sm.copyWith(color: color), + ), + ); + } +} diff --git a/lib/screens/settings_screen.dart b/lib/screens/settings_screen.dart index a51cb28..8ae770e 100644 --- a/lib/screens/settings_screen.dart +++ b/lib/screens/settings_screen.dart @@ -7,7 +7,9 @@ import 'package:package_info_plus/package_info_plus.dart'; import 'package:permission_handler/permission_handler.dart'; import 'package:url_launcher/url_launcher.dart'; +import '../router/hark_router.dart'; import '../state/chat_notifier.dart'; +import '../state/cloud_provider_notifier.dart'; import '../state/settings_notifier.dart'; /// User-facing settings surface. @@ -147,6 +149,12 @@ class _SettingsScreenState extends ConsumerState value: '0.3 (cooldown 1500 ms)', ), + _SectionHeader('Cloud brain (beta)'), + _CloudBrainRow( + state: ref.watch(cloudProviderNotifierProvider), + onTap: () => context.push(HarkRoutes.cloudBrain), + ), + _SectionHeader('Models'), // TODO: source model names + sizes from embeddingProvider / // slotFillingProvider so they don't drift if the defaults change. @@ -423,6 +431,38 @@ class _Row extends StatelessWidget { } } +class _CloudBrainRow extends StatelessWidget { + const _CloudBrainRow({required this.state, required this.onTap}); + + final CloudProviderState state; + final Future Function() onTap; + + @override + Widget build(BuildContext context) { + final colors = context.theme.colors; + final config = state.config; + final configured = config != null; + final description = configured + ? 'Foundry · ${config.model} · ${state.mode.wireName}' + : 'Send stage 2 to your own Azure / Foundry deployment.'; + return FTappable( + onPress: onTap, + child: _Row( + icon: FIcons.cloud, + label: 'Foundry cloud', + description: description, + trailing: configured + ? _StatusPill(text: 'On', ok: true) + : Icon( + FIcons.chevronRight, + size: 16, + color: colors.mutedForeground, + ), + ), + ); + } +} + class _StatusPill extends StatelessWidget { const _StatusPill({required this.text, required this.ok}); diff --git a/lib/services/cloud/azure_url_parser.dart b/lib/services/cloud/azure_url_parser.dart new file mode 100644 index 0000000..ad2f9b6 --- /dev/null +++ b/lib/services/cloud/azure_url_parser.dart @@ -0,0 +1,108 @@ +import 'cloud_provider_config.dart'; + +/// Parses a full Azure OpenAI / Foundry endpoint URL into the discrete +/// fields [AzureConfig] needs. +/// +/// The Azure portal shows users a URL like: +/// +/// https://hark-ai-resource.cognitiveservices.azure.com/openai/deployments/hark-cloud-gpt-4-mini/chat/completions?api-version=2025-01-01-preview +/// +/// rather than three separate fields, so the Cloud Brain settings +/// screen lets users paste it verbatim and we extract: +/// +/// - `baseUrl` — everything up to and including `/openai/deployments/{name}` +/// (the adapter appends `/chat/completions`) +/// - `model` — the deployment name segment (`hark-cloud-gpt-4-mini`) +/// - `apiVersion` — the `api-version` query parameter +/// +/// Supports both classic (`*.openai.azure.com`, +/// `*.cognitiveservices.azure.com`) and the newer Foundry domain +/// (`*.services.ai.azure.com`). Also accepts URLs without the trailing +/// `/chat/completions` path (user pre-trimmed) and URLs with extra +/// query params. +/// +/// Throws [FormatException] with a user-friendly message if the URL +/// doesn't match the expected shape. The Cloud Brain screen catches +/// this and surfaces it inline. +class AzureUrlParser { + const AzureUrlParser(); + + /// Parse [rawUrl] into an [AzureConfig]. [apiKey] is supplied + /// separately and persisted via [CloudProviderNotifier.setConfig]. + AzureConfig parse({required String rawUrl, required String apiKey}) { + final trimmed = rawUrl.trim(); + if (trimmed.isEmpty) { + throw const FormatException('URL is empty.'); + } + + final Uri uri; + try { + uri = Uri.parse(trimmed); + } on FormatException { + throw const FormatException( + 'URL is not a valid URI. Paste the full endpoint from the Azure ' + 'portal (Keys and Endpoint tab).', + ); + } + + if (!uri.hasScheme || (uri.scheme != 'https' && uri.scheme != 'http')) { + throw const FormatException( + 'URL must start with https://. Paste the full endpoint from ' + 'the Azure portal.', + ); + } + if (uri.host.isEmpty) { + throw const FormatException('URL is missing a host.'); + } + + // Find the /openai/deployments/{name} segment. + final segments = uri.pathSegments; + final openaiIdx = segments.indexOf('openai'); + if (openaiIdx == -1) { + throw const FormatException( + 'URL does not look like an Azure OpenAI / Foundry endpoint. ' + 'Expected a path containing /openai/deployments/{deployment}.', + ); + } + if (openaiIdx + 2 >= segments.length || + segments[openaiIdx + 1] != 'deployments') { + throw const FormatException( + 'URL is missing /deployments/{deployment-name}. Make sure you ' + 'copied the full endpoint, not just the resource URL.', + ); + } + final deploymentName = segments[openaiIdx + 2]; + if (deploymentName.isEmpty) { + throw const FormatException( + 'Deployment name is empty in the URL.', + ); + } + + // api-version is required. + final apiVersion = uri.queryParameters['api-version']; + if (apiVersion == null || apiVersion.isEmpty) { + throw const FormatException( + 'URL is missing the ?api-version=... query parameter. Copy the ' + 'full endpoint from the Azure portal — it includes the version.', + ); + } + + // Reconstruct the base URL: scheme + host + /openai/deployments/{name} + // (drop any /chat/completions suffix and all query params). + final baseSegments = + segments.sublist(0, openaiIdx + 3); // openai, deployments, name + final baseUri = Uri( + scheme: uri.scheme, + host: uri.host, + port: uri.hasPort ? uri.port : null, + pathSegments: baseSegments, + ); + + return AzureConfig( + baseUrl: baseUri.toString(), + apiKey: apiKey, + model: deploymentName, + apiVersion: apiVersion, + ); + } +} diff --git a/test/services/cloud/azure_url_parser_test.dart b/test/services/cloud/azure_url_parser_test.dart new file mode 100644 index 0000000..d16c739 --- /dev/null +++ b/test/services/cloud/azure_url_parser_test.dart @@ -0,0 +1,166 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:hark/services/cloud/azure_url_parser.dart'; + +void main() { + const parser = AzureUrlParser(); + + group('AzureUrlParser — happy paths', () { + test('parses full classic cognitiveservices URL', () { + final cfg = parser.parse( + rawUrl: + 'https://hark-ai-resource.cognitiveservices.azure.com/openai/deployments/hark-cloud-gpt-4-mini/chat/completions?api-version=2025-01-01-preview', + apiKey: 'sk-test', + ); + expect( + cfg.baseUrl, + 'https://hark-ai-resource.cognitiveservices.azure.com/openai/deployments/hark-cloud-gpt-4-mini', + ); + expect(cfg.model, 'hark-cloud-gpt-4-mini'); + expect(cfg.apiVersion, '2025-01-01-preview'); + expect(cfg.apiKey, 'sk-test'); + }); + + test('parses classic openai.azure.com URL', () { + final cfg = parser.parse( + rawUrl: + 'https://my-resource.openai.azure.com/openai/deployments/gpt4mini/chat/completions?api-version=2024-10-21', + apiKey: 'k', + ); + expect( + cfg.baseUrl, + 'https://my-resource.openai.azure.com/openai/deployments/gpt4mini', + ); + expect(cfg.model, 'gpt4mini'); + expect(cfg.apiVersion, '2024-10-21'); + }); + + test('parses Foundry services.ai.azure.com URL', () { + final cfg = parser.parse( + rawUrl: + 'https://hark-ai-resource.services.ai.azure.com/openai/deployments/hark-mini/chat/completions?api-version=2025-01-01-preview', + apiKey: 'k', + ); + expect( + cfg.baseUrl, + 'https://hark-ai-resource.services.ai.azure.com/openai/deployments/hark-mini', + ); + expect(cfg.model, 'hark-mini'); + }); + + test('accepts URL without trailing /chat/completions', () { + final cfg = parser.parse( + rawUrl: + 'https://r.cognitiveservices.azure.com/openai/deployments/d?api-version=2024-10-21', + apiKey: 'k', + ); + expect( + cfg.baseUrl, + 'https://r.cognitiveservices.azure.com/openai/deployments/d', + ); + expect(cfg.model, 'd'); + expect(cfg.apiVersion, '2024-10-21'); + }); + + test('strips extra query parameters but keeps api-version', () { + final cfg = parser.parse( + rawUrl: + 'https://r.openai.azure.com/openai/deployments/d/chat/completions?api-version=2024-10-21&other=x', + apiKey: 'k', + ); + expect(cfg.apiVersion, '2024-10-21'); + expect(cfg.baseUrl, 'https://r.openai.azure.com/openai/deployments/d'); + }); + + test('whitespace is trimmed', () { + final cfg = parser.parse( + rawUrl: + ' https://r.openai.azure.com/openai/deployments/d/chat/completions?api-version=2024-10-21 \n', + apiKey: 'k', + ); + expect(cfg.model, 'd'); + }); + }); + + group('AzureUrlParser — error paths', () { + test('empty URL', () { + expect( + () => parser.parse(rawUrl: '', apiKey: 'k'), + throwsA( + isA().having((e) => e.message, 'message', 'URL is empty.'), + ), + ); + }); + + test('malformed URI', () { + expect( + () => parser.parse(rawUrl: 'http://[bad', apiKey: 'k'), + throwsFormatException, + ); + }); + + test('missing https scheme', () { + expect( + () => parser.parse(rawUrl: 'ftp://x.com/openai/deployments/d?api-version=v', apiKey: 'k'), + throwsFormatException, + ); + }); + + test('plain resource URL without /openai path', () { + expect( + () => parser.parse(rawUrl: 'https://r.cognitiveservices.azure.com/', apiKey: 'k'), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('does not look like'), + ), + ), + ); + }); + + test('missing /deployments segment after /openai', () { + expect( + () => parser.parse( + rawUrl: 'https://r.openai.azure.com/openai/something/d?api-version=v', + apiKey: 'k', + ), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('missing /deployments'), + ), + ), + ); + }); + + test('missing deployment name', () { + // /openai/deployments with no name — the path would just end there + // and the parser should reject it. + expect( + () => parser.parse( + rawUrl: 'https://r.openai.azure.com/openai/deployments?api-version=v', + apiKey: 'k', + ), + throwsA(isA()), + ); + }); + + test('missing api-version query param', () { + expect( + () => parser.parse( + rawUrl: + 'https://r.openai.azure.com/openai/deployments/d/chat/completions', + apiKey: 'k', + ), + throwsA( + isA().having( + (e) => e.message, + 'message', + contains('api-version'), + ), + ), + ); + }); + }); +} From 575c2413580670905857286c94f549bb77c2e0f5 Mon Sep 17 00:00:00 2001 From: 0xharkirat Date: Tue, 14 Apr 2026 16:04:27 +1000 Subject: [PATCH 2/5] chore(cloud): log full Azure error body on HTTP failures The 404 path threw a CloudHardError with just the openai_dart exception message, which loses the actual Azure response body (`error.message`, `error.code`, `error.type`). Add HarkCloudErr debug log lines per failure type so first-pass cloud testing can inspect what Azure actually said. Also enrich the CloudHardError thrown on 404 with the deployment name we sent so the user sees what we asked for vs what Azure couldn't find. Co-Authored-By: Claude Opus 4.6 --- .../adapters/openai_compatible_adapter.dart | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/services/cloud/adapters/openai_compatible_adapter.dart b/lib/services/cloud/adapters/openai_compatible_adapter.dart index 9fc7e82..56d2c15 100644 --- a/lib/services/cloud/adapters/openai_compatible_adapter.dart +++ b/lib/services/cloud/adapters/openai_compatible_adapter.dart @@ -139,33 +139,49 @@ class OpenAiCompatibleAdapter implements HarkLlmClient { ) .timeout(timeout); } on TimeoutException catch (e) { + debugPrint( + 'HarkCloudErr: timeout after ${timeout.inSeconds}s ' + 'kind=${_config.kind.wireName}', + ); throw CloudUnavailableError( 'Cloud request timed out after ${timeout.inSeconds}s', cause: e, ); } on NotFoundException catch (e) { - // Deployment / model not found — user must fix config. + debugPrint( + 'HarkCloudErr: 404 kind=${_config.kind.wireName} ' + 'baseUrl=${_config.baseUrl} model=${_config.model} ' + 'message=${e.message} type=${e.type} code=${e.code} ' + 'body=${e.body}', + ); throw CloudHardError( - 'Provider returned 404 — check your base URL and model / ' - 'deployment name. Original message: ${e.message}', + 'Provider returned 404 — check your base URL, deployment name ' + '(${_config.model}), and api-version. Azure said: ${e.message}', cause: e, ); } on ApiException catch (e) { - // 401 / 429 / 4xx (other) / 5xx — recoverable, fall back in - // CLOUD_PREFERRED. + debugPrint( + 'HarkCloudErr: HTTP ${e.statusCode} kind=${_config.kind.wireName} ' + 'baseUrl=${_config.baseUrl} model=${_config.model} ' + 'message=${e.message} type=${e.type} code=${e.code} ' + 'body=${e.body}', + ); throw CloudUnavailableError( e.message, cause: e, statusCode: e.statusCode, ); } on OpenAIException catch (e) { - // Catches: ConnectionException, RequestTimeoutException, - // ParseException, AbortedException — all recoverable. + debugPrint( + 'HarkCloudErr: transport kind=${_config.kind.wireName} ' + 'message=${e.message} cause=${e.cause}', + ); throw CloudUnavailableError( 'Cloud transport error: ${e.message}', cause: e, ); } catch (e) { + debugPrint('HarkCloudErr: unknown kind=${_config.kind.wireName} $e'); throw CloudUnavailableError( 'Cloud request failed: $e', cause: e, From 5be9a5101c5df3115da19f3c24ef04b7c0e0c251 Mon Sep 17 00:00:00 2001 From: 0xharkirat Date: Tue, 14 Apr 2026 16:16:20 +1000 Subject: [PATCH 3/5] fix(cloud): use Bearer auth for Azure (Foundry serverless endpoints) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Microsoft surfaces two Azure OpenAI templates in their docs: - Classic Azure OpenAI Service: api-key: header - Foundry serverless endpoints: Authorization: Bearer header Same URL shape, different auth header. The Foundry path is what every modern model deployment uses (gpt-4.1-mini and friends). Sending api-key to a Bearer-expecting endpoint returns 404 with body {error: {code: 404, message: "Resource not found"}} instead of a clean 401, because the path lookup fails before auth runs. User reproduced this end-to-end: the Azure portal generated docs for their hark-cloud-gpt-4-mini deployment use Authorization: Bearer. Switch the adapter to use ApiKeyProvider (Bearer) for Azure too. The api-version query parameter is still wired correctly via OpenAIConfig. The kind dispatch collapses to a single shared branch since all four OpenAI-compat providers now use the same auth header — kind is still useful for future per-provider tweaks (Anthropic adapter dispatch, Gemini-specific quirks). If a future user has a legacy classic Azure OpenAI deployment that only accepts api-key, we'll add a UI toggle then. For now the Foundry-default behavior matches reality. Co-Authored-By: Claude Opus 4.6 --- .../adapters/openai_compatible_adapter.dart | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/services/cloud/adapters/openai_compatible_adapter.dart b/lib/services/cloud/adapters/openai_compatible_adapter.dart index 56d2c15..411d926 100644 --- a/lib/services/cloud/adapters/openai_compatible_adapter.dart +++ b/lib/services/cloud/adapters/openai_compatible_adapter.dart @@ -60,26 +60,34 @@ class OpenAiCompatibleAdapter implements HarkLlmClient { /// Build an [OpenAIClient] from a [CloudProviderConfig]. Dispatches /// on `kind` to pick the right auth provider and wire the api-version /// query param for Azure. + /// + /// **Auth header note for Azure:** Foundry serverless endpoints + /// (which is how the Azure portal deploys gpt-4.1-mini and most newer + /// models in 2026) expect `Authorization: Bearer {key}`, NOT the + /// `api-key:` header that classic Azure OpenAI Service uses. Microsoft + /// surfaces both auth styles in their docs depending on which + /// deployment template the model uses, but the URL looks identical. + /// Sending `api-key:` to a Bearer-expecting endpoint produces a 404 + /// (path lookup fails) instead of a clean 401, which makes the + /// failure mode confusing. + /// + /// We default Azure to `ApiKeyProvider` (Bearer) because that's what + /// every Foundry-deployed model in the user's Azure account currently + /// expects. If a future user has a legacy classic Azure OpenAI + /// deployment that only accepts `api-key:`, we'll add a UI toggle + /// then. static OpenAIClient _buildClient(CloudProviderConfig config) { switch (config.kind) { case CloudProviderKind.azureOpenAi: - final azure = config as AzureConfig; - return OpenAIClient( - config: OpenAIConfig( - baseUrl: azure.baseUrl, - authProvider: AzureApiKeyProvider(azure.apiKey), - apiVersion: azure.apiVersion, - timeout: const Duration(seconds: 15), - ), - ); - case CloudProviderKind.openai: case CloudProviderKind.gemini: case CloudProviderKind.customOpenAi: + final apiVersion = config is AzureConfig ? config.apiVersion : null; return OpenAIClient( config: OpenAIConfig( baseUrl: config.baseUrl, authProvider: ApiKeyProvider(config.apiKey), + apiVersion: apiVersion, timeout: const Duration(seconds: 15), ), ); From d5efe6e8f85c5bd0eda2e2eb42ccfb3f73ba6fd0 Mon Sep 17 00:00:00 2001 From: 0xharkirat Date: Tue, 14 Apr 2026 16:29:48 +1000 Subject: [PATCH 4/5] refactor(cloud): drop openai_dart, use direct package:http POST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The provider-abstraction package was net-negative for our use case: - We POST one well-known shape per call (chat/completions with tools + tool_choice) — no abstraction needed - Its Azure auth defaults bit us in Slice 5 (AzureApiKeyProvider sent api-key header instead of Bearer; mismatched our Foundry serverless deployment which expects Authorization: Bearer) - Even after switching to ApiKeyProvider it still 404'd, masking whatever the actual issue is behind a wire format we couldn't inspect - 50KB dependency for what is now a 50-line http POST Direct http gives us: - Full visibility on the request bytes (matches the curl Microsoft surfaces in their portal verbatim) - Single code path: same Authorization: Bearer header for OpenAI, Azure, Gemini, OpenRouter, custom — every modern OpenAI-compat endpoint accepts Bearer - Tighter URL construction: append /chat/completions to baseUrl by string concat (Uri.resolve mishandles per-deployment URLs) - Simpler error mapping: status code → CloudUnavailableError / CloudHardError without translating openai_dart exception types Anthropic still throws CloudHardError here — Slice 7 gets the dedicated AnthropicAdapter for the native tool_use shape. Co-Authored-By: Claude Opus 4.6 --- .../adapters/openai_compatible_adapter.dart | 265 +++++++++--------- pubspec.lock | 10 +- pubspec.yaml | 11 +- 3 files changed, 140 insertions(+), 146 deletions(-) diff --git a/lib/services/cloud/adapters/openai_compatible_adapter.dart b/lib/services/cloud/adapters/openai_compatible_adapter.dart index 411d926..dc78239 100644 --- a/lib/services/cloud/adapters/openai_compatible_adapter.dart +++ b/lib/services/cloud/adapters/openai_compatible_adapter.dart @@ -2,7 +2,7 @@ import 'dart:async'; import 'dart:convert'; import 'package:flutter/foundation.dart'; -import 'package:openai_dart/openai_dart.dart'; +import 'package:http/http.dart' as http; import '../../../models/assistant_action.dart'; import '../cloud_errors.dart'; @@ -11,16 +11,28 @@ import '../hark_llm_client.dart'; import '../oacp_to_tool_schema.dart'; import '../slot_result_validator.dart'; -/// Cloud slot filler that targets any OpenAI-compatible backend. +/// Cloud slot filler that targets any OpenAI-compatible chat/completions +/// endpoint via direct HTTP. Replaces the previous `openai_dart`-based +/// implementation — we POST one well-known shape per call, so the +/// provider-abstraction package was net-negative (its Azure auth +/// defaults bit us with the wrong header for Foundry serverless +/// endpoints, and we couldn't see the actual request bytes). /// -/// Covers in one client: -/// - OpenAI direct (`api.openai.com/v1`) via `Authorization: Bearer` -/// - Azure OpenAI (classic per-deployment URL or v1/Foundry surface) -/// via `api-key` header + `?api-version=...` query param +/// Supported in one client: +/// - OpenAI direct (`api.openai.com/v1`) +/// - Azure / Foundry serverless (`*.cognitiveservices.azure.com/openai/deployments/{name}` +/// or `*.services.ai.azure.com/...`) /// - Gemini OpenAI-compat endpoint -/// (`generativelanguage.googleapis.com/v1beta/openai`) via Bearer +/// (`generativelanguage.googleapis.com/v1beta/openai`) /// - Custom OpenAI-compatible backends (OpenRouter, LiteLLM, vLLM, -/// Together, Groq, self-hosted) via Bearer +/// Together, Groq, self-hosted) +/// +/// Auth: `Authorization: Bearer {apiKey}` for every provider. Microsoft +/// surfaces both `api-key:` and `Authorization: Bearer` in their Azure +/// docs depending on which template generated them; the Foundry-managed +/// flavor (used by every modern model deployment in 2026) wants Bearer. +/// If a future user has a legacy classic Azure OpenAI deployment that +/// only accepts `api-key:`, we'll add a UI toggle then. /// /// Anthropic is NOT handled here — its native `tool_use` shape needs /// the dedicated [AnthropicAdapter] (Slice 7). @@ -30,74 +42,46 @@ import '../slot_result_validator.dart'; /// via [OacpToToolSchema]. /// 2. Build a system prompt with extraction instructions + entity /// context (aliases, known entities) from the same translator. -/// 3. POST chat/completions with `tools=[tool]`, -/// `tool_choice=function(name)` so the model is forced to call our -/// tool (no chit-chat). -/// 4. Parse `tool_calls[0].function.arguments` (JSON string). +/// 3. POST `{baseUrl}/chat/completions[?api-version=...]` with +/// `tools=[tool]`, `tool_choice=function(name)` so the model is +/// forced to call our tool (no chit-chat). +/// 4. Parse `choices[0].message.tool_calls[0].function.arguments` +/// (JSON string). /// 5. Validate via [SlotResultValidator] — same coercion rules as the /// on-device path. -/// -/// Failures map to the [HarkLlmClient] failure semantics: -/// - Network / 5xx / malformed JSON → [CloudUnavailableError] -/// - 401 → [CloudUnavailableError] (recoverable: fix key in settings; -/// immediate fallback in CLOUD_PREFERRED is desired) -/// - 404 → [CloudHardError] (deployment / model not found, user must -/// fix it) -/// - Schema with no parameters / unsupported config → [CloudHardError] -/// - Validated map missing required slots → return null (matches local -/// path's `slot_filling_failed`) class OpenAiCompatibleAdapter implements HarkLlmClient { - OpenAiCompatibleAdapter(this._config) - : _client = _buildClient(_config), + OpenAiCompatibleAdapter( + this._config, { + http.Client? httpClient, + }) : _http = httpClient ?? http.Client(), + _ownsClient = httpClient == null, _translator = const OacpToToolSchema(), _validator = const SlotResultValidator(); final CloudProviderConfig _config; - final OpenAIClient _client; + final http.Client _http; + final bool _ownsClient; final OacpToToolSchema _translator; final SlotResultValidator _validator; - /// Build an [OpenAIClient] from a [CloudProviderConfig]. Dispatches - /// on `kind` to pick the right auth provider and wire the api-version - /// query param for Azure. - /// - /// **Auth header note for Azure:** Foundry serverless endpoints - /// (which is how the Azure portal deploys gpt-4.1-mini and most newer - /// models in 2026) expect `Authorization: Bearer {key}`, NOT the - /// `api-key:` header that classic Azure OpenAI Service uses. Microsoft - /// surfaces both auth styles in their docs depending on which - /// deployment template the model uses, but the URL looks identical. - /// Sending `api-key:` to a Bearer-expecting endpoint produces a 404 - /// (path lookup fails) instead of a clean 401, which makes the - /// failure mode confusing. - /// - /// We default Azure to `ApiKeyProvider` (Bearer) because that's what - /// every Foundry-deployed model in the user's Azure account currently - /// expects. If a future user has a legacy classic Azure OpenAI - /// deployment that only accepts `api-key:`, we'll add a UI toggle - /// then. - static OpenAIClient _buildClient(CloudProviderConfig config) { - switch (config.kind) { - case CloudProviderKind.azureOpenAi: - case CloudProviderKind.openai: - case CloudProviderKind.gemini: - case CloudProviderKind.customOpenAi: - final apiVersion = config is AzureConfig ? config.apiVersion : null; - return OpenAIClient( - config: OpenAIConfig( - baseUrl: config.baseUrl, - authProvider: ApiKeyProvider(config.apiKey), - apiVersion: apiVersion, - timeout: const Duration(seconds: 15), - ), - ); + /// Build the full POST URL: `{baseUrl}/chat/completions` with + /// optional `?api-version=...` query for Azure. We do this by hand + /// rather than using `Uri.resolve` because resolve mishandles base + /// URLs whose last segment looks like a path component (e.g. + /// `/openai/deployments/{name}` would lose `{name}` on resolve). + Uri _chatCompletionsUri() { + final base = _config.baseUrl.endsWith('/') + ? _config.baseUrl.substring(0, _config.baseUrl.length - 1) + : _config.baseUrl; + final pathJoined = '$base/chat/completions'; + final uri = Uri.parse(pathJoined); - case CloudProviderKind.anthropic: - throw CloudHardError( - 'Anthropic is not handled by OpenAiCompatibleAdapter. ' - 'Use AnthropicAdapter (Slice 7) instead.', - ); + final config = _config; + final apiVersion = config is AzureConfig ? config.apiVersion : null; + if (apiVersion != null && apiVersion.isNotEmpty) { + return uri.replace(queryParameters: {'api-version': apiVersion}); } + return uri; } @override @@ -106,110 +90,127 @@ class OpenAiCompatibleAdapter implements HarkLlmClient { required AssistantAction action, Duration timeout = const Duration(seconds: 4), }) async { + if (_config.kind == CloudProviderKind.anthropic) { + throw CloudHardError( + 'Anthropic is not handled by OpenAiCompatibleAdapter. ' + 'Use AnthropicAdapter (Slice 7) instead.', + ); + } + // 1. Translate OACP schema → OpenAI tool definition. final toolJson = _translator.translate(action); final functionDef = toolJson['function'] as Map; final functionName = functionDef['name'] as String; - final tool = Tool.function( - name: functionName, - description: functionDef['description'] as String, - parameters: functionDef['parameters'] as Map, - ); - // 2. Build messages: system prompt with entity context, then user - // transcript verbatim. Force the tool call so the model can't - // answer in prose. + // 2. System prompt + user message body. final systemPrompt = _buildSystemPrompt(action); - final messages = [ - ChatMessage.system(systemPrompt), - ChatMessage.user(transcript), - ]; + final body = { + 'model': _config.model, + 'messages': [ + {'role': 'system', 'content': systemPrompt}, + {'role': 'user', 'content': transcript}, + ], + 'tools': [toolJson], + 'tool_choice': { + 'type': 'function', + 'function': {'name': functionName}, + }, + }; - // 3. POST chat/completions with the tool, forcing its invocation. - // Grep-friendly request log: `adb logcat | grep HarkCloudReq` to - // verify on-device. Never logs the API key — just enough to debug. + final url = _chatCompletionsUri(); + + // Grep-friendly request log: `adb logcat | grep HarkCloudReq`. + // Never logs the API key. final stopwatch = Stopwatch()..start(); debugPrint( 'HarkCloudReq: kind=${_config.kind.wireName} ' - 'baseUrl=${_config.baseUrl} model=${_config.model} ' - 'action=$functionName transcript="$transcript"', + 'url=$url model=${_config.model} action=$functionName ' + 'transcript="$transcript"', ); - final ChatCompletion response; + + final http.Response response; try { - response = await _client.chat.completions - .create( - ChatCompletionCreateRequest( - model: _config.model, - messages: messages, - tools: [tool], - toolChoice: ToolChoice.function(functionName), - ), + response = await _http + .post( + url, + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ${_config.apiKey}', + }, + body: jsonEncode(body), ) .timeout(timeout); } on TimeoutException catch (e) { debugPrint( - 'HarkCloudErr: timeout after ${timeout.inSeconds}s ' - 'kind=${_config.kind.wireName}', + 'HarkCloudErr: timeout after ${timeout.inSeconds}s url=$url', ); throw CloudUnavailableError( 'Cloud request timed out after ${timeout.inSeconds}s', cause: e, ); - } on NotFoundException catch (e) { - debugPrint( - 'HarkCloudErr: 404 kind=${_config.kind.wireName} ' - 'baseUrl=${_config.baseUrl} model=${_config.model} ' - 'message=${e.message} type=${e.type} code=${e.code} ' - 'body=${e.body}', - ); - throw CloudHardError( - 'Provider returned 404 — check your base URL, deployment name ' - '(${_config.model}), and api-version. Azure said: ${e.message}', + } catch (e) { + debugPrint('HarkCloudErr: transport url=$url $e'); + throw CloudUnavailableError( + 'Cloud transport error: $e', cause: e, ); - } on ApiException catch (e) { + } + + if (response.statusCode >= 400) { debugPrint( - 'HarkCloudErr: HTTP ${e.statusCode} kind=${_config.kind.wireName} ' - 'baseUrl=${_config.baseUrl} model=${_config.model} ' - 'message=${e.message} type=${e.type} code=${e.code} ' - 'body=${e.body}', + 'HarkCloudErr: HTTP ${response.statusCode} url=$url ' + 'body=${response.body}', ); + if (response.statusCode == 404) { + throw CloudHardError( + 'Provider returned 404 — check your base URL, deployment ' + 'name (${_config.model}), and api-version. Azure said: ' + '${response.body}', + ); + } throw CloudUnavailableError( - e.message, - cause: e, - statusCode: e.statusCode, + 'HTTP ${response.statusCode}: ${response.body}', + statusCode: response.statusCode, ); - } on OpenAIException catch (e) { + } + + // 3. Parse response. + Map json; + try { + json = jsonDecode(response.body) as Map; + } catch (e) { debugPrint( - 'HarkCloudErr: transport kind=${_config.kind.wireName} ' - 'message=${e.message} cause=${e.cause}', + 'HarkCloudErr: malformed response body=${response.body}', ); throw CloudUnavailableError( - 'Cloud transport error: ${e.message}', + 'Provider response was not valid JSON', cause: e, ); - } catch (e) { - debugPrint('HarkCloudErr: unknown kind=${_config.kind.wireName} $e'); + } + + final choices = json['choices'] as List?; + if (choices == null || choices.isEmpty) { throw CloudUnavailableError( - 'Cloud request failed: $e', - cause: e, + 'Provider response had no choices: ${response.body}', ); } - - // 4. Extract tool call arguments. We forced the call, so anything - // else is malformed. - if (!response.hasToolCalls) { + final message = + (choices.first as Map)['message'] as Map?; + final toolCalls = message?['tool_calls'] as List?; + if (toolCalls == null || toolCalls.isEmpty) { debugPrint( - 'OpenAiCompatibleAdapter: no tool call in response despite ' - 'tool_choice=function — finish_reason=' - '${response.choices.first.finishReason}', + 'HarkCloudErr: no tool_calls in response despite tool_choice. ' + 'finish_reason=${(choices.first as Map)['finish_reason']} ' + 'body=${response.body}', ); throw CloudUnavailableError( 'Provider did not return a tool call', ); } - final toolCall = response.allToolCalls.first; - final argsRaw = toolCall.function.arguments; + + final firstCall = toolCalls.first as Map; + final fn = firstCall['function'] as Map; + final argsRaw = fn['arguments'] as String; Map argsMap; try { @@ -221,7 +222,7 @@ class OpenAiCompatibleAdapter implements HarkLlmClient { ); } - // 5. Validate against the OACP schema with the same coercion rules + // 4. Validate against the OACP schema with the same coercion rules // as the local path. Returns null if required slots are missing // — resolver maps that to slot_filling_failed. final validated = _validator.validateMap(argsMap, action); @@ -260,10 +261,10 @@ class OpenAiCompatibleAdapter implements HarkLlmClient { return lines.join('\n'); } - /// Release the underlying HTTP client. Slice 4 should call this in - /// `ref.onDispose` when the cloud config changes so old clients don't - /// leak. + /// Release the underlying HTTP client. Slice 4 wires this via + /// `ref.onDispose` so old clients don't leak when the cloud config + /// changes. void close() { - _client.close(); + if (_ownsClient) _http.close(); } } diff --git a/pubspec.lock b/pubspec.lock index 887d7cc..33132df 100644 --- a/pubspec.lock +++ b/pubspec.lock @@ -381,7 +381,7 @@ packages: source: hosted version: "1.0.2" http: - dependency: transitive + dependency: "direct main" description: name: http sha256: "87721a4a50b19c7f1d49001e51409bddc46303966ce89a65af4f4e6004896412" @@ -572,14 +572,6 @@ packages: url: "https://pub.dev" source: hosted version: "9.3.0" - openai_dart: - dependency: "direct main" - description: - name: openai_dart - sha256: "9cc8adc8bfefa520b9f4fd6b22d5f0a41feb7f737ef6e89f16a199e926234c55" - url: "https://pub.dev" - source: hosted - version: "4.1.0" package_config: dependency: transitive description: diff --git a/pubspec.yaml b/pubspec.yaml index 01f37ec..b0cdba6 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -23,11 +23,12 @@ dependencies: # Values are encrypted with a hardware-bound key; see README for the # rooted-device caveat (flutter_secure_storage#947). flutter_secure_storage: ^9.2.2 - # OpenAI-compatible client for stage-2 cloud slot fill (BYOK Slice 3). - # Covers OpenAI direct, Azure OpenAI (via baseUrl + AzureApiKeyProvider - # + apiVersion), Gemini compat endpoint, and custom OpenAI-compatible - # backends (OpenRouter, vLLM, LiteLLM, etc.) with one client surface. - openai_dart: ^4.0.1 + # Direct HTTP for the cloud slot-fill adapter. We POST one well-known + # shape (chat/completions with tools + tool_choice) per call, so a + # ~50-line http wrapper beats pulling in a provider-abstraction + # package whose Azure auth defaults bit us in Slice 5 (sent api-key + # header instead of Bearer; mismatched our Foundry deployment). + http: ^1.2.0 path_provider: ^2.1.0 forui: ^0.20.4 flutter_riverpod: ^3.3.1 From 74f87643401990af7df15a21716bea9a3c05ba4d Mon Sep 17 00:00:00 2001 From: 0xharkirat Date: Tue, 14 Apr 2026 16:51:43 +1000 Subject: [PATCH 5/5] fix(cloud): await initial config load before slot-fill routing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Race condition surfaced in device testing: cloudProviderNotifier.build() returns a default empty state immediately and kicks off _loadFromStorage() fire-and-forget. If the first voice command fires before that load completes — which is the cold-start path when the user goes straight to a command without opening Settings first — cloudSlotFillerProvider sees config=null and the resolver routes to local Qwen3. Earlier "working" runs only worked because the user had opened the Cloud Brain settings screen first, which warmed the secure-storage load. Fix: await CloudProviderNotifier.awaitInitialLoad() at the top of the resolver's slotFill closure. Idempotent + cached, so the cost is one secure-storage read on the very first command of a session and zero thereafter. Co-Authored-By: Claude Opus 4.6 --- lib/state/resolver_provider.dart | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/state/resolver_provider.dart b/lib/state/resolver_provider.dart index fe9dcb4..faacdc2 100644 --- a/lib/state/resolver_provider.dart +++ b/lib/state/resolver_provider.dart @@ -56,6 +56,16 @@ final commandResolverProvider = Provider((ref) { embedDocument: (text) async => ref.read(embeddingProvider.notifier).embedDocument(text), slotFill: ({required transcript, required action}) async { + // Make sure the secure-storage-backed cloud config has been + // loaded at least once. Without this, the first voice command + // after a cold start sees the notifier's default empty state + // (because _loadFromStorage is fire-and-forget in build()) and + // routes to local even when the user has a saved config. + // Idempotent + cached after the first call. + await ref + .read(cloudProviderNotifierProvider.notifier) + .awaitInitialLoad(); + // Env-bootstrap (--dart-define AZURE_*) overrides the stored mode // so a developer running locally for first-pass cloud telemetry // gets cloud-preferred behavior without a settings UI. Once the