Merge pull request #316811 from microsoft/dev/vritant24/resolveUtilityModels

rwoll · web-flow · commit b53fa1ac1b1f · 2026-05-17T16:12:34.000-07:00
Avoid blocking Copilot model selection on utility alias resolution
diff --git a/extensions/copilot/src/extension/conversation/vscode-node/languageModelAccess.ts b/extensions/copilot/src/extension/conversation/vscode-node/languageModelAccess.ts
@@ -18,7 +18,7 @@ import { encodeStatefulMarker } from '../../../platform/endpoint/common/stateful
 import { isAnthropicFamily, isGeminiFamily } from '../../../platform/endpoint/common/chatModelCapabilities';
 import { AutoChatEndpoint } from '../../../platform/endpoint/node/autoChatEndpoint';
 import { IAutomodeService } from '../../../platform/endpoint/node/automodeService';
-import { CopilotChatEndpoint } from '../../../platform/endpoint/node/copilotChatEndpoint';
+import { CopilotChatEndpoint, CopilotUtilitySmallChatEndpoint } from '../../../platform/endpoint/node/copilotChatEndpoint';
 import { IEnvService, isScenarioAutomation } from '../../../platform/env/common/envService';
 import { IVSCodeExtensionContext } from '../../../platform/extContext/common/extensionContext';
 import { IOctoKitService } from '../../../platform/github/common/githubService';
@@ -159,6 +159,25 @@ function buildConfigurationSchema(endpoint: IChatEndpoint): { configurationSchem
 	return { configurationSchema: { properties } };
 }
 
+const utilityAliasFamilies: readonly ChatEndpointFamily[] = ['copilot-utility-small', 'copilot-utility'];
+
+/**
+ * Checks whether `endpoint` is the built-in Copilot endpoint for a utility alias.
+ */
+function isDefaultEndpointForUtilityFamily(family: ChatEndpointFamily, endpoint: IChatEndpoint): boolean {
+	if (!(endpoint instanceof CopilotChatEndpoint)) {
+		return false;
+	}
+	switch (family) {
+		case 'copilot-utility-small':
+			return endpoint.family === CopilotUtilitySmallChatEndpoint.capiFamily;
+		case 'copilot-utility':
+			return endpoint.isFallback;
+		default:
+			return false;
+	}
+}
+
 /**
  * Builds the {@link vscode.LanguageModelChatInformation} entry that publishes a
  * utility-family alias (e.g. `copilot-utility-small`) under the copilot vendor.
@@ -228,6 +247,8 @@ export class LanguageModelAccess extends Disposable implements IExtensionContrib
 	 * underlying endpoint without re-resolving the user setting.
 	 */
 	private _utilityAliasEndpoints: Map<string, IChatEndpoint> = new Map();
+	// Overrides resolved outside model-info publication, reused on the next alias publish.
+	private readonly _resolvedUtilityEndpoints = new Map<ChatEndpointFamily, { endpoint: IChatEndpoint; baseCount: number }>();
 	private _lmWrapper: CopilotLanguageModelWrapper;
 	private _promptBaseCountCache: LanguageModelAccessPromptBaseCountCache;
 
@@ -282,7 +303,9 @@ export class LanguageModelAccess extends Disposable implements IExtensionContrib
 			this._onDidChange.fire();
 		}));
 		this._register(this._endpointProvider.onDidModelsRefresh(() => {
-			// Models have been refreshed from CAPI so we should requery them
+			// Drop stale overrides; model publication uses defaults until refresh completes.
+			this._resolvedUtilityEndpoints.clear();
+			void this._refreshUtilityOverrides();
 			this._onDidChange.fire();
 		}));
 	}
@@ -395,46 +418,76 @@ export class LanguageModelAccess extends Disposable implements IExtensionContrib
 		this._currentModels = models;
 		this._chatEndpoints = chatEndpoints;
 
-		await this._registerUtilityAliasModels(models);
+		this._registerUtilityAliasModels(models, allEndpoints);
 		return models;
 	}
 
-	private async _registerUtilityAliasModels(models: vscode.LanguageModelChatInformation[]): Promise<void> {
+	/** Publishes utility aliases without waiting for override resolution. */
+	private _registerUtilityAliasModels(
+		models: vscode.LanguageModelChatInformation[],
+		allEndpoints: readonly IChatEndpoint[],
+	): void {
 		this._utilityAliasEndpoints.clear();
-		const aliasFamilies: ChatEndpointFamily[] = ['copilot-utility-small', 'copilot-utility'];
 		const session = this._authenticationService.anyGitHubSession;
 		const requiresAuthorization = session ? { label: session.account.label } : undefined;
-		for (const family of aliasFamilies) {
-			let endpoint: IChatEndpoint | undefined;
-			try {
-				endpoint = await this._endpointProvider.getChatEndpoint(family);
-			} catch (err) {
-				this._logService.warn(`[LanguageModelAccess] Failed to resolve utility alias '${family}': ${err}`);
-				continue;
-			}
+
+		for (const family of utilityAliasFamilies) {
+			const cached = this._resolvedUtilityEndpoints.get(family);
+			const endpoint = cached?.endpoint ?? allEndpoints.find(e => isDefaultEndpointForUtilityFamily(family, e));
 			if (!endpoint) {
 				continue;
 			}
 			this._utilityAliasEndpoints.set(family, endpoint);
 
 			try {
-				const baseCount = await this._promptBaseCountCache.getBaseCount(endpoint);
-				// Always publish the alias as an entry under the `copilot` vendor
-				// — including for BYOK overrides — so workbench consumers that
-				// resolve these utility aliases directly via
-				// `vscode.lm.selectChatModels({ vendor: 'copilot', id: '<alias>' })`
-				// continue to discover a model when the alias is overridden to a
-				// non-copilot endpoint. The alias entry carries the same
-				// `requiresAuthorization` metadata as regular copilot entries so
-				// other extensions can't use it to bypass the underlying
-				// provider's authorization prompt.
-				const aliasInfo = buildUtilityAliasModelInfo(family, endpoint, models, baseCount, requiresAuthorization);
+				// Copilot defaults clone an existing entry; synthesized override aliases need baseCount.
+				const aliasInfo = buildUtilityAliasModelInfo(family, endpoint, models, cached?.baseCount ?? 0, requiresAuthorization);
 				this._logService.trace(`[LanguageModelAccess] Publishing alias '${family}' -> ${endpoint.model} (${aliasInfo.synthesized ? 'synthesized' : 'cloned'}, ${endpoint instanceof CopilotChatEndpoint ? 'copilot' : 'override'}).`);
 				models.push(aliasInfo.info);
 			} catch (err) {
 				this._logService.warn(`[LanguageModelAccess] Failed to publish utility alias '${family}' -> ${endpoint.model}; skipping. Error: ${err}`);
 			}
 		}
+
+		// Override resolution may hang, so keep it off the model-info request path.
+		void this._refreshUtilityOverrides().catch(err => {
+			this._logService.warn(`[LanguageModelAccess] Failed to refresh utility overrides: ${err}`);
+		});
+	}
+
+	/** Resolves configured utility model overrides for the next alias publish. */
+	private async _refreshUtilityOverrides(): Promise<void> {
+		let didChange = false;
+		for (const family of utilityAliasFamilies) {
+			let resolved: IChatEndpoint | undefined;
+			try {
+				resolved = await this._endpointProvider.getChatEndpoint(family);
+			} catch (err) {
+				this._logService.warn(`[LanguageModelAccess] Failed to resolve utility alias '${family}' in background: ${err}`);
+				continue;
+			}
+			if (!resolved) {
+				continue;
+			}
+			// Skip when the override resolved to the same endpoint that's
+			// already published; no alias change needed.
+			const published = this._utilityAliasEndpoints.get(family);
+			if (published && published.model === resolved.model && published.modelProvider === resolved.modelProvider) {
+				continue;
+			}
+			let baseCount: number;
+			try {
+				baseCount = await this._promptBaseCountCache.getBaseCount(resolved);
+			} catch (err) {
+				this._logService.warn(`[LanguageModelAccess] Failed to compute baseCount for utility alias '${family}' -> ${resolved.model}; keeping previously-published alias. Error: ${err}`);
+				continue;
+			}
+			this._resolvedUtilityEndpoints.set(family, { endpoint: resolved, baseCount });
+			didChange = true;
+		}
+		if (didChange) {
+			this._onDidChange.fire();
+		}
 	}
 
 	private async _getEndpointForModel(model: vscode.LanguageModelChatInformation) {
diff --git a/extensions/copilot/src/extension/conversation/vscode-node/test/languageModelAccess.test.ts b/extensions/copilot/src/extension/conversation/vscode-node/test/languageModelAccess.test.ts
@@ -8,16 +8,23 @@ import * as vscode from 'vscode';
 import { IChatMLFetcher } from '../../../../platform/chat/common/chatMLFetcher';
 import { ChatFetchResponseType } from '../../../../platform/chat/common/commonTypes';
 import { MockChatMLFetcher } from '../../../../platform/chat/test/common/mockChatMLFetcher';
+import { CopilotToken, createTestExtendedTokenInfo } from '../../../../platform/authentication/common/copilotToken';
+import { ICopilotTokenManager } from '../../../../platform/authentication/common/copilotTokenManager';
+import { IAutomodeService } from '../../../../platform/endpoint/node/automodeService';
 import { IEndpointProvider } from '../../../../platform/endpoint/common/endpointProvider';
 import { CustomDataPartMimeTypes } from '../../../../platform/endpoint/common/endpointTypes';
 import { CopilotChatEndpoint } from '../../../../platform/endpoint/node/copilotChatEndpoint';
+import { IEnvService } from '../../../../platform/env/common/envService';
 import { IVSCodeExtensionContext } from '../../../../platform/extContext/common/extensionContext';
 import { IChatEndpoint } from '../../../../platform/networking/common/networking';
 import { ITestingServicesAccessor } from '../../../../platform/test/node/services';
+import { TokenizerType } from '../../../../util/common/tokenizer';
+import { DeferredPromise, raceTimeout } from '../../../../util/vs/base/common/async';
 import { CancellationToken } from '../../../../util/vs/base/common/cancellation';
+import { Event } from '../../../../util/vs/base/common/event';
 import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
 import { createExtensionTestingServices } from '../../../test/vscode-node/services';
-import { buildUtilityAliasModelInfo, CopilotLanguageModelWrapper } from '../languageModelAccess';
+import { buildUtilityAliasModelInfo, CopilotLanguageModelWrapper, LanguageModelAccess } from '../languageModelAccess';
 
 
 suite('CopilotLanguageModelWrapper', () => {
@@ -137,6 +144,111 @@ suite('CopilotLanguageModelWrapper', () => {
 	});
 });
 
+suite('LanguageModelAccess model info', () => {
+	test('does not wait for utility alias endpoint resolution', async () => {
+		const aliasLookupStarted = new DeferredPromise<void>();
+		const unresolvedAliasEndpoint = new DeferredPromise<IChatEndpoint>();
+		const endpoint = {
+			model: 'gpt-4o-mini',
+			name: 'GPT 4o mini',
+			family: 'gpt-4o-mini',
+			version: '2024-07-18',
+			modelProvider: 'copilot',
+			modelMaxPromptTokens: 128_000,
+			maxOutputTokens: 4_096,
+			supportsToolCalls: true,
+			supportsVision: false,
+			supportsPrediction: false,
+			showInModelPicker: false,
+			isFallback: false,
+			tokenizer: TokenizerType.O200K,
+			urlOrRequestMetadata: '',
+		} as unknown as IChatEndpoint;
+		const copilotToken = new CopilotToken(createTestExtendedTokenInfo({ token: 'token', username: 'fake', copilot_plan: 'unknown' }));
+		const testingServiceCollection = createExtensionTestingServices();
+		testingServiceCollection.define(ICopilotTokenManager, {
+			_serviceBrand: undefined,
+			onDidCopilotTokenRefresh: Event.None,
+			getCopilotToken: async () => copilotToken,
+			resetCopilotToken: () => { },
+		} as unknown as ICopilotTokenManager);
+		testingServiceCollection.define(IAutomodeService, {
+			_serviceBrand: undefined,
+			resolveAutoModeEndpoint: async () => endpoint,
+			invalidateRouterCache: () => { },
+		} as unknown as IAutomodeService);
+		testingServiceCollection.define(IEndpointProvider, {
+			_serviceBrand: undefined,
+			onDidModelsRefresh: Event.None,
+			getAllCompletionModels: async () => [],
+			getAllChatEndpoints: async () => [endpoint],
+			getChatEndpoint: async (requestOrFamily: unknown) => {
+				if (typeof requestOrFamily === 'string') {
+					void aliasLookupStarted.complete();
+					return unresolvedAliasEndpoint.p;
+				}
+				return endpoint;
+			},
+			getEmbeddingsEndpoint: async () => { throw new Error('Not implemented in test'); },
+		} as unknown as IEndpointProvider);
+		const accessor = testingServiceCollection.createTestingAccessor();
+		// Pre-populate the prompt base-count cache so that
+		// `_provideLanguageModelChatInfo`'s per-endpoint base-count lookup
+		// resolves synchronously from cache rather than spinning up the
+		// real tokenizer (which is slow and not relevant to this test).
+		const extensionContext = accessor.get(IVSCodeExtensionContext);
+		const baseCountCacheKey = 'lmBaseCount/gpt-4o-mini';
+		await extensionContext.globalState.update(baseCountCacheKey, { extensionVersion: accessor.get(IEnvService).getVersion(), baseCount: 0 });
+		const languageModelAccess = accessor.get(IInstantiationService).createInstance(LanguageModelAccess);
+		try {
+			const modelInfo = (languageModelAccess as unknown as { _provideLanguageModelChatInfo(options: { silent: boolean }, token: vscode.CancellationToken): Promise<vscode.LanguageModelChatInformation[]> })._provideLanguageModelChatInfo({ silent: true }, CancellationToken.None);
+			const resolved = await raceTimeout(modelInfo, 2_000);
+			assert.ok(resolved, 'provideLanguageModelChatInfo did not resolve while utility alias lookup was pending');
+			assert.deepStrictEqual(resolved.map(model => model.id), ['gpt-4o-mini']);
+			assert.ok(aliasLookupStarted.isResolved, 'expected utility alias lookup to have been started in the background');
+		} finally {
+			languageModelAccess.dispose();
+			await extensionContext.globalState.update(baseCountCacheKey, undefined);
+		}
+	});
+
+	test('refreshes utility aliases when an override uses the same model id from another provider', async () => {
+		const publishedEndpoint = {
+			model: 'gpt-4o-mini',
+			modelProvider: 'copilot',
+		} as IChatEndpoint;
+		const resolvedEndpoint = {
+			model: 'gpt-4o-mini',
+			modelProvider: 'azure',
+		} as IChatEndpoint;
+		const testingServiceCollection = createExtensionTestingServices();
+		testingServiceCollection.define(IEndpointProvider, {
+			_serviceBrand: undefined,
+			onDidModelsRefresh: Event.None,
+			getAllCompletionModels: async () => [],
+			getAllChatEndpoints: async () => [],
+			getChatEndpoint: async () => resolvedEndpoint,
+			getEmbeddingsEndpoint: async () => { throw new Error('Not implemented in test'); },
+		} as unknown as IEndpointProvider);
+		const accessor = testingServiceCollection.createTestingAccessor();
+		const languageModelAccess = accessor.get(IInstantiationService).createInstance(LanguageModelAccess);
+		const internals = languageModelAccess as unknown as {
+			_utilityAliasEndpoints: Map<string, IChatEndpoint>;
+			_resolvedUtilityEndpoints: Map<string, { endpoint: IChatEndpoint; baseCount: number }>;
+			_promptBaseCountCache: { getBaseCount(endpoint: IChatEndpoint): Promise<number> };
+			_refreshUtilityOverrides(): Promise<void>;
+		};
+		internals._utilityAliasEndpoints.set('copilot-utility-small', publishedEndpoint);
+		internals._promptBaseCountCache = { getBaseCount: async () => 0 };
+		try {
+			await internals._refreshUtilityOverrides();
+			assert.strictEqual(internals._resolvedUtilityEndpoints.get('copilot-utility-small')?.endpoint, resolvedEndpoint);
+		} finally {
+			languageModelAccess.dispose();
+		}
+	});
+});
+
 suite('buildUtilityAliasModelInfo', () => {
 
 	function makeEndpoint(overrides: Partial<IChatEndpoint>): IChatEndpoint {
diff --git a/src/vs/workbench/contrib/chat/test/common/languageModels.test.ts b/src/vs/workbench/contrib/chat/test/common/languageModels.test.ts
@@ -207,6 +207,55 @@ suite('LanguageModels', function () {
 		assert.ok(vendors.some(v => v.vendor === 'test-vendor'));
 		assert.ok(vendors.some(v => v.vendor === 'actual-vendor'));
 	});
+
+	test('selectLanguageModels matches by id for copilot vendor models even when isUserSelectable is false', async function () {
+		// Mirrors how the copilot extension publishes utility aliases such as
+		// `copilot-utility-small`: under the `copilot` (default) vendor, with
+		// `isUserSelectable: false`. The workbench's
+		// `chatToolRiskAssessmentService` resolves them with
+		// `selectLanguageModels({ vendor: 'copilot', id: 'copilot-utility-small' })`
+		// and must get a match.
+		languageModels.deltaLanguageModelChatProviderDescriptors([
+			{ vendor: 'copilot', displayName: 'Copilot', configuration: undefined, managementCommand: undefined, when: undefined }
+		], []);
+
+		store.add(languageModels.registerLanguageModelProvider('copilot', {
+			onDidChange: Event.None,
+			provideLanguageModelChatInfo: async () => {
+				const modelMetadata: ILanguageModelChatMetadata[] = [
+					{
+						extension: nullExtensionDescription.identifier,
+						name: 'GPT 4o mini',
+						vendor: 'copilot',
+						family: 'gpt-4o-mini',
+						version: '2024-07-18',
+						id: 'gpt-4o-mini',
+						maxInputTokens: 100,
+						maxOutputTokens: 100,
+						isDefaultForLocation: {}
+					},
+					{
+						extension: nullExtensionDescription.identifier,
+						name: 'GPT 4o mini',
+						vendor: 'copilot',
+						family: 'copilot-utility-small',
+						version: '2024-07-18',
+						id: 'copilot-utility-small',
+						maxInputTokens: 100,
+						maxOutputTokens: 100,
+						isDefaultForLocation: {},
+						isUserSelectable: false
+					}
+				];
+				return modelMetadata.map(m => ({ metadata: m, identifier: `${m.vendor}/${m.id}` }));
+			},
+			sendChatRequest: async () => { throw new Error(); },
+			provideTokenCount: async () => { throw new Error(); }
+		}));
+
+		const result = await languageModels.selectLanguageModels({ vendor: 'copilot', id: 'copilot-utility-small' });
+		assert.deepStrictEqual(result, ['copilot/copilot-utility-small']);
+	});
 });
 
 suite('LanguageModels - When Clause', function () {