Cache list_models across all SDK languages to prevent rate limiting under concurrency (#300)

Copilot · friggeri · web-flow · commit a552ae497313 · 2026-01-30T17:22:07.000-07:00
* Initial plan

* Add caching for list_models to prevent rate limiting

Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;

* Fix linting: remove quotes from type annotation

* Fix race condition in list_models caching and add missing test decorator

Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;

* Return copies of cached models list and improve test robustness

Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;

* Add list_models caching across all SDK languages (nodejs, dotnet, go)

Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: friggeri &lt;106686+friggeri@users.noreply.github.com&gt;
Co-authored-by: Adrien Friggeri &lt;adrien@friggeri.net&gt;
diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs
@@ -58,6 +58,8 @@ public partial class CopilotClient : IDisposable, IAsyncDisposable
     private bool _disposed;
     private readonly int? _optionsPort;
     private readonly string? _optionsHost;
+    private List<ModelInfo>? _modelsCache;
+    private readonly SemaphoreSlim _modelsCacheLock = new(1, 1);
 
     /// <summary>
     /// Creates a new instance of <see cref="CopilotClient"/>.
@@ -284,6 +286,9 @@ private async Task CleanupConnectionAsync(List<Exception>? errors)
         try { ctx.Rpc.Dispose(); }
         catch (Exception ex) { errors?.Add(ex); }
 
+        // Clear models cache
+        _modelsCache = null;
+
         if (ctx.NetworkStream is not null)
         {
             try { await ctx.NetworkStream.DisposeAsync(); }
@@ -545,15 +550,38 @@ public async Task<GetAuthStatusResponse> GetAuthStatusAsync(CancellationToken ca
     /// </summary>
     /// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the operation.</param>
     /// <returns>A task that resolves with a list of available models.</returns>
+    /// <remarks>
+    /// Results are cached after the first successful call to avoid rate limiting.
+    /// The cache is cleared when the client disconnects.
+    /// </remarks>
     /// <exception cref="InvalidOperationException">Thrown when the client is not connected or not authenticated.</exception>
     public async Task<List<ModelInfo>> ListModelsAsync(CancellationToken cancellationToken = default)
     {
         var connection = await EnsureConnectedAsync(cancellationToken);
 
-        var response = await InvokeRpcAsync<GetModelsResponse>(
-            connection.Rpc, "models.list", [], cancellationToken);
+        // Use semaphore for async locking to prevent race condition with concurrent calls
+        await _modelsCacheLock.WaitAsync(cancellationToken);
+        try
+        {
+            // Check cache (already inside lock)
+            if (_modelsCache is not null)
+            {
+                return new List<ModelInfo>(_modelsCache); // Return a copy to prevent cache mutation
+            }
+
+            // Cache miss - fetch from backend while holding lock
+            var response = await InvokeRpcAsync<GetModelsResponse>(
+                connection.Rpc, "models.list", [], cancellationToken);
+
+            // Update cache before releasing lock
+            _modelsCache = response.Models;
 
-        return response.Models;
+            return new List<ModelInfo>(response.Models); // Return a copy to prevent cache mutation
+        }
+        finally
+        {
+            _modelsCacheLock.Release();
+        }
     }
 
     /// <summary>
diff --git a/go/client.go b/go/client.go
@@ -74,6 +74,8 @@ type Client struct {
 	useStdio         bool        // resolved value from options
 	autoStart        bool        // resolved value from options
 	autoRestart      bool        // resolved value from options
+	modelsCache      []ModelInfo
+	modelsCacheMux   sync.Mutex
 }
 
 // NewClient creates a new Copilot CLI client with the given options.
@@ -324,6 +326,11 @@ func (c *Client) Stop() []error {
 		c.client = nil
 	}
 
+	// Clear models cache
+	c.modelsCacheMux.Lock()
+	c.modelsCache = nil
+	c.modelsCacheMux.Unlock()
+
 	c.state = StateDisconnected
 	if !c.isExternalServer {
 		c.actualPort = 0
@@ -380,6 +387,11 @@ func (c *Client) ForceStop() {
 		c.client = nil
 	}
 
+	// Clear models cache
+	c.modelsCacheMux.Lock()
+	c.modelsCache = nil
+	c.modelsCacheMux.Unlock()
+
 	c.state = StateDisconnected
 	if !c.isExternalServer {
 		c.actualPort = 0
@@ -1013,12 +1025,28 @@ func (c *Client) GetAuthStatus() (*GetAuthStatusResponse, error) {
 	return response, nil
 }
 
-// ListModels returns available models with their metadata
+// ListModels returns available models with their metadata.
+//
+// Results are cached after the first successful call to avoid rate limiting.
+// The cache is cleared when the client disconnects.
 func (c *Client) ListModels() ([]ModelInfo, error) {
 	if c.client == nil {
 		return nil, fmt.Errorf("client not connected")
 	}
 
+	// Use mutex for locking to prevent race condition with concurrent calls
+	c.modelsCacheMux.Lock()
+	defer c.modelsCacheMux.Unlock()
+
+	// Check cache (already inside lock)
+	if c.modelsCache != nil {
+		// Return a copy to prevent cache mutation
+		result := make([]ModelInfo, len(c.modelsCache))
+		copy(result, c.modelsCache)
+		return result, nil
+	}
+
+	// Cache miss - fetch from backend while holding lock
 	result, err := c.client.Request("models.list", map[string]interface{}{})
 	if err != nil {
 		return nil, err
@@ -1035,7 +1063,13 @@ func (c *Client) ListModels() ([]ModelInfo, error) {
 		return nil, fmt.Errorf("failed to unmarshal models response: %w", err)
 	}
 
-	return response.Models, nil
+	// Update cache before releasing lock
+	c.modelsCache = response.Models
+
+	// Return a copy to prevent cache mutation
+	models := make([]ModelInfo, len(response.Models))
+	copy(models, response.Models)
+	return models, nil
 }
 
 // verifyProtocolVersion verifies that the server's protocol version matches the SDK's expected version
diff --git a/nodejs/src/client.ts b/nodejs/src/client.ts
@@ -112,6 +112,8 @@ export class CopilotClient {
     };
     private isExternalServer: boolean = false;
     private forceStopping: boolean = false;
+    private modelsCache: ModelInfo[] | null = null;
+    private modelsCacheLock: Promise<void> = Promise.resolve();
 
     /**
      * Creates a new CopilotClient instance.
@@ -315,6 +317,9 @@ export class CopilotClient {
             this.connection = null;
         }
 
+        // Clear models cache
+        this.modelsCache = null;
+
         if (this.socket) {
             try {
                 this.socket.end();
@@ -389,6 +394,9 @@ export class CopilotClient {
             this.connection = null;
         }
 
+        // Clear models cache
+        this.modelsCache = null;
+
         if (this.socket) {
             try {
                 this.socket.destroy(); // destroy() is more forceful than end()
@@ -640,17 +648,44 @@ export class CopilotClient {
     }
 
     /**
-     * List available models with their metadata
+     * List available models with their metadata.
+     *
+     * Results are cached after the first successful call to avoid rate limiting.
+     * The cache is cleared when the client disconnects.
+     *
      * @throws Error if not authenticated
      */
     async listModels(): Promise<ModelInfo[]> {
         if (!this.connection) {
             throw new Error("Client not connected");
         }
 
-        const result = await this.connection.sendRequest("models.list", {});
-        const response = result as { models: ModelInfo[] };
-        return response.models;
+        // Use promise-based locking to prevent race condition with concurrent calls
+        await this.modelsCacheLock;
+
+        let resolveLock: () => void;
+        this.modelsCacheLock = new Promise((resolve) => {
+            resolveLock = resolve;
+        });
+
+        try {
+            // Check cache (already inside lock)
+            if (this.modelsCache !== null) {
+                return [...this.modelsCache]; // Return a copy to prevent cache mutation
+            }
+
+            // Cache miss - fetch from backend while holding lock
+            const result = await this.connection.sendRequest("models.list", {});
+            const response = result as { models: ModelInfo[] };
+            const models = response.models;
+
+            // Update cache before releasing lock
+            this.modelsCache = models;
+
+            return [...models]; // Return a copy to prevent cache mutation
+        } finally {
+            resolveLock!();
+        }
     }
 
     /**
diff --git a/python/copilot/client.py b/python/copilot/client.py
@@ -157,6 +157,8 @@ def __init__(self, options: Optional[CopilotClientOptions] = None):
         self._state: ConnectionState = "disconnected"
         self._sessions: dict[str, CopilotSession] = {}
         self._sessions_lock = threading.Lock()
+        self._models_cache: Optional[list[ModelInfo]] = None
+        self._models_cache_lock = asyncio.Lock()
 
     def _parse_cli_url(self, url: str) -> tuple[str, int]:
         """
@@ -281,6 +283,10 @@ async def stop(self) -> list["StopError"]:
             await self._client.stop()
             self._client = None
 
+        # Clear models cache
+        async with self._models_cache_lock:
+            self._models_cache = None
+
         # Kill CLI process
         # Kill CLI process (only if we spawned it)
         if self._process and not self._is_external_server:
@@ -325,6 +331,10 @@ async def force_stop(self) -> None:
                 pass  # Ignore errors during force stop
             self._client = None
 
+        # Clear models cache
+        async with self._models_cache_lock:
+            self._models_cache = None
+
         # Kill CLI process immediately
         if self._process and not self._is_external_server:
             self._process.kill()
@@ -709,6 +719,9 @@ async def list_models(self) -> list["ModelInfo"]:
         """
         List available models with their metadata.
 
+        Results are cached after the first successful call to avoid rate limiting.
+        The cache is cleared when the client disconnects.
+
         Returns:
             A list of ModelInfo objects with model details.
 
@@ -724,9 +737,21 @@ async def list_models(self) -> list["ModelInfo"]:
         if not self._client:
             raise RuntimeError("Client not connected")
 
-        response = await self._client.request("models.list", {})
-        models_data = response.get("models", [])
-        return [ModelInfo.from_dict(model) for model in models_data]
+        # Use asyncio lock to prevent race condition with concurrent calls
+        async with self._models_cache_lock:
+            # Check cache (already inside lock)
+            if self._models_cache is not None:
+                return list(self._models_cache)  # Return a copy to prevent cache mutation
+
+            # Cache miss - fetch from backend while holding lock
+            response = await self._client.request("models.list", {})
+            models_data = response.get("models", [])
+            models = [ModelInfo.from_dict(model) for model in models_data]
+
+            # Update cache before releasing lock
+            self._models_cache = models
+
+            return list(models)  # Return a copy to prevent cache mutation
 
     async def list_sessions(self) -> list["SessionMetadata"]:
         """
diff --git a/python/e2e/test_client.py b/python/e2e/test_client.py
@@ -135,3 +135,47 @@ async def test_should_list_models_when_authenticated(self):
             await client.stop()
         finally:
             await client.force_stop()
+
+    @pytest.mark.asyncio
+    async def test_should_cache_models_list(self):
+        """Test that list_models caches results to avoid rate limiting"""
+        client = CopilotClient({"cli_path": CLI_PATH, "use_stdio": True})
+
+        try:
+            await client.start()
+
+            auth_status = await client.get_auth_status()
+            if not auth_status.isAuthenticated:
+                # Skip if not authenticated - models.list requires auth
+                await client.stop()
+                return
+
+            # First call should fetch from backend
+            models1 = await client.list_models()
+            assert isinstance(models1, list)
+
+            # Second call should return from cache (different list object but same content)
+            models2 = await client.list_models()
+            assert models2 is not models1, "Should return a copy, not the same object"
+            assert len(models2) == len(models1), "Cached results should have same content"
+            if len(models1) > 0:
+                assert models1[0].id == models2[0].id, "Cached models should match"
+
+            # After stopping, cache should be cleared
+            await client.stop()
+
+            # Restart and verify cache is empty
+            await client.start()
+
+            # Check authentication again after restart
+            auth_status = await client.get_auth_status()
+            if not auth_status.isAuthenticated:
+                await client.stop()
+                return
+
+            models3 = await client.list_models()
+            assert models3 is not models1, "Cache should be cleared after disconnect"
+
+            await client.stop()
+        finally:
+            await client.force_stop()