Skip to content

Commit a552ae4

Browse files
Copilotfriggeri
andauthored
Cache list_models across all SDK languages to prevent rate limiting under concurrency (#300)
* Initial plan * Add caching for list_models to prevent rate limiting Co-authored-by: friggeri <[email protected]> * Fix linting: remove quotes from type annotation * Fix race condition in list_models caching and add missing test decorator Co-authored-by: friggeri <[email protected]> * Return copies of cached models list and improve test robustness Co-authored-by: friggeri <[email protected]> * Add list_models caching across all SDK languages (nodejs, dotnet, go) Co-authored-by: friggeri <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: friggeri <[email protected]> Co-authored-by: Adrien Friggeri <[email protected]>
1 parent c39a129 commit a552ae4

File tree

5 files changed

+178
-12
lines changed

5 files changed

+178
-12
lines changed

dotnet/src/Client.cs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ public partial class CopilotClient : IDisposable, IAsyncDisposable
5858
private bool _disposed;
5959
private readonly int? _optionsPort;
6060
private readonly string? _optionsHost;
61+
private List<ModelInfo>? _modelsCache;
62+
private readonly SemaphoreSlim _modelsCacheLock = new(1, 1);
6163

6264
/// <summary>
6365
/// Creates a new instance of <see cref="CopilotClient"/>.
@@ -284,6 +286,9 @@ private async Task CleanupConnectionAsync(List<Exception>? errors)
284286
try { ctx.Rpc.Dispose(); }
285287
catch (Exception ex) { errors?.Add(ex); }
286288

289+
// Clear models cache
290+
_modelsCache = null;
291+
287292
if (ctx.NetworkStream is not null)
288293
{
289294
try { await ctx.NetworkStream.DisposeAsync(); }
@@ -545,15 +550,38 @@ public async Task<GetAuthStatusResponse> GetAuthStatusAsync(CancellationToken ca
545550
/// </summary>
546551
/// <param name="cancellationToken">A <see cref="CancellationToken"/> that can be used to cancel the operation.</param>
547552
/// <returns>A task that resolves with a list of available models.</returns>
553+
/// <remarks>
554+
/// Results are cached after the first successful call to avoid rate limiting.
555+
/// The cache is cleared when the client disconnects.
556+
/// </remarks>
548557
/// <exception cref="InvalidOperationException">Thrown when the client is not connected or not authenticated.</exception>
549558
public async Task<List<ModelInfo>> ListModelsAsync(CancellationToken cancellationToken = default)
550559
{
551560
var connection = await EnsureConnectedAsync(cancellationToken);
552561

553-
var response = await InvokeRpcAsync<GetModelsResponse>(
554-
connection.Rpc, "models.list", [], cancellationToken);
562+
// Use semaphore for async locking to prevent race condition with concurrent calls
563+
await _modelsCacheLock.WaitAsync(cancellationToken);
564+
try
565+
{
566+
// Check cache (already inside lock)
567+
if (_modelsCache is not null)
568+
{
569+
return new List<ModelInfo>(_modelsCache); // Return a copy to prevent cache mutation
570+
}
571+
572+
// Cache miss - fetch from backend while holding lock
573+
var response = await InvokeRpcAsync<GetModelsResponse>(
574+
connection.Rpc, "models.list", [], cancellationToken);
575+
576+
// Update cache before releasing lock
577+
_modelsCache = response.Models;
555578

556-
return response.Models;
579+
return new List<ModelInfo>(response.Models); // Return a copy to prevent cache mutation
580+
}
581+
finally
582+
{
583+
_modelsCacheLock.Release();
584+
}
557585
}
558586

559587
/// <summary>

go/client.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ type Client struct {
7474
useStdio bool // resolved value from options
7575
autoStart bool // resolved value from options
7676
autoRestart bool // resolved value from options
77+
modelsCache []ModelInfo
78+
modelsCacheMux sync.Mutex
7779
}
7880

7981
// NewClient creates a new Copilot CLI client with the given options.
@@ -324,6 +326,11 @@ func (c *Client) Stop() []error {
324326
c.client = nil
325327
}
326328

329+
// Clear models cache
330+
c.modelsCacheMux.Lock()
331+
c.modelsCache = nil
332+
c.modelsCacheMux.Unlock()
333+
327334
c.state = StateDisconnected
328335
if !c.isExternalServer {
329336
c.actualPort = 0
@@ -380,6 +387,11 @@ func (c *Client) ForceStop() {
380387
c.client = nil
381388
}
382389

390+
// Clear models cache
391+
c.modelsCacheMux.Lock()
392+
c.modelsCache = nil
393+
c.modelsCacheMux.Unlock()
394+
383395
c.state = StateDisconnected
384396
if !c.isExternalServer {
385397
c.actualPort = 0
@@ -1013,12 +1025,28 @@ func (c *Client) GetAuthStatus() (*GetAuthStatusResponse, error) {
10131025
return response, nil
10141026
}
10151027

1016-
// ListModels returns available models with their metadata
1028+
// ListModels returns available models with their metadata.
1029+
//
1030+
// Results are cached after the first successful call to avoid rate limiting.
1031+
// The cache is cleared when the client disconnects.
10171032
func (c *Client) ListModels() ([]ModelInfo, error) {
10181033
if c.client == nil {
10191034
return nil, fmt.Errorf("client not connected")
10201035
}
10211036

1037+
// Use mutex for locking to prevent race condition with concurrent calls
1038+
c.modelsCacheMux.Lock()
1039+
defer c.modelsCacheMux.Unlock()
1040+
1041+
// Check cache (already inside lock)
1042+
if c.modelsCache != nil {
1043+
// Return a copy to prevent cache mutation
1044+
result := make([]ModelInfo, len(c.modelsCache))
1045+
copy(result, c.modelsCache)
1046+
return result, nil
1047+
}
1048+
1049+
// Cache miss - fetch from backend while holding lock
10221050
result, err := c.client.Request("models.list", map[string]interface{}{})
10231051
if err != nil {
10241052
return nil, err
@@ -1035,7 +1063,13 @@ func (c *Client) ListModels() ([]ModelInfo, error) {
10351063
return nil, fmt.Errorf("failed to unmarshal models response: %w", err)
10361064
}
10371065

1038-
return response.Models, nil
1066+
// Update cache before releasing lock
1067+
c.modelsCache = response.Models
1068+
1069+
// Return a copy to prevent cache mutation
1070+
models := make([]ModelInfo, len(response.Models))
1071+
copy(models, response.Models)
1072+
return models, nil
10391073
}
10401074

10411075
// verifyProtocolVersion verifies that the server's protocol version matches the SDK's expected version

nodejs/src/client.ts

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ export class CopilotClient {
112112
};
113113
private isExternalServer: boolean = false;
114114
private forceStopping: boolean = false;
115+
private modelsCache: ModelInfo[] | null = null;
116+
private modelsCacheLock: Promise<void> = Promise.resolve();
115117

116118
/**
117119
* Creates a new CopilotClient instance.
@@ -315,6 +317,9 @@ export class CopilotClient {
315317
this.connection = null;
316318
}
317319

320+
// Clear models cache
321+
this.modelsCache = null;
322+
318323
if (this.socket) {
319324
try {
320325
this.socket.end();
@@ -389,6 +394,9 @@ export class CopilotClient {
389394
this.connection = null;
390395
}
391396

397+
// Clear models cache
398+
this.modelsCache = null;
399+
392400
if (this.socket) {
393401
try {
394402
this.socket.destroy(); // destroy() is more forceful than end()
@@ -640,17 +648,44 @@ export class CopilotClient {
640648
}
641649

642650
/**
643-
* List available models with their metadata
651+
* List available models with their metadata.
652+
*
653+
* Results are cached after the first successful call to avoid rate limiting.
654+
* The cache is cleared when the client disconnects.
655+
*
644656
* @throws Error if not authenticated
645657
*/
646658
async listModels(): Promise<ModelInfo[]> {
647659
if (!this.connection) {
648660
throw new Error("Client not connected");
649661
}
650662

651-
const result = await this.connection.sendRequest("models.list", {});
652-
const response = result as { models: ModelInfo[] };
653-
return response.models;
663+
// Use promise-based locking to prevent race condition with concurrent calls
664+
await this.modelsCacheLock;
665+
666+
let resolveLock: () => void;
667+
this.modelsCacheLock = new Promise((resolve) => {
668+
resolveLock = resolve;
669+
});
670+
671+
try {
672+
// Check cache (already inside lock)
673+
if (this.modelsCache !== null) {
674+
return [...this.modelsCache]; // Return a copy to prevent cache mutation
675+
}
676+
677+
// Cache miss - fetch from backend while holding lock
678+
const result = await this.connection.sendRequest("models.list", {});
679+
const response = result as { models: ModelInfo[] };
680+
const models = response.models;
681+
682+
// Update cache before releasing lock
683+
this.modelsCache = models;
684+
685+
return [...models]; // Return a copy to prevent cache mutation
686+
} finally {
687+
resolveLock!();
688+
}
654689
}
655690

656691
/**

python/copilot/client.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ def __init__(self, options: Optional[CopilotClientOptions] = None):
157157
self._state: ConnectionState = "disconnected"
158158
self._sessions: dict[str, CopilotSession] = {}
159159
self._sessions_lock = threading.Lock()
160+
self._models_cache: Optional[list[ModelInfo]] = None
161+
self._models_cache_lock = asyncio.Lock()
160162

161163
def _parse_cli_url(self, url: str) -> tuple[str, int]:
162164
"""
@@ -281,6 +283,10 @@ async def stop(self) -> list["StopError"]:
281283
await self._client.stop()
282284
self._client = None
283285

286+
# Clear models cache
287+
async with self._models_cache_lock:
288+
self._models_cache = None
289+
284290
# Kill CLI process
285291
# Kill CLI process (only if we spawned it)
286292
if self._process and not self._is_external_server:
@@ -325,6 +331,10 @@ async def force_stop(self) -> None:
325331
pass # Ignore errors during force stop
326332
self._client = None
327333

334+
# Clear models cache
335+
async with self._models_cache_lock:
336+
self._models_cache = None
337+
328338
# Kill CLI process immediately
329339
if self._process and not self._is_external_server:
330340
self._process.kill()
@@ -709,6 +719,9 @@ async def list_models(self) -> list["ModelInfo"]:
709719
"""
710720
List available models with their metadata.
711721
722+
Results are cached after the first successful call to avoid rate limiting.
723+
The cache is cleared when the client disconnects.
724+
712725
Returns:
713726
A list of ModelInfo objects with model details.
714727
@@ -724,9 +737,21 @@ async def list_models(self) -> list["ModelInfo"]:
724737
if not self._client:
725738
raise RuntimeError("Client not connected")
726739

727-
response = await self._client.request("models.list", {})
728-
models_data = response.get("models", [])
729-
return [ModelInfo.from_dict(model) for model in models_data]
740+
# Use asyncio lock to prevent race condition with concurrent calls
741+
async with self._models_cache_lock:
742+
# Check cache (already inside lock)
743+
if self._models_cache is not None:
744+
return list(self._models_cache) # Return a copy to prevent cache mutation
745+
746+
# Cache miss - fetch from backend while holding lock
747+
response = await self._client.request("models.list", {})
748+
models_data = response.get("models", [])
749+
models = [ModelInfo.from_dict(model) for model in models_data]
750+
751+
# Update cache before releasing lock
752+
self._models_cache = models
753+
754+
return list(models) # Return a copy to prevent cache mutation
730755

731756
async def list_sessions(self) -> list["SessionMetadata"]:
732757
"""

python/e2e/test_client.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,47 @@ async def test_should_list_models_when_authenticated(self):
135135
await client.stop()
136136
finally:
137137
await client.force_stop()
138+
139+
@pytest.mark.asyncio
140+
async def test_should_cache_models_list(self):
141+
"""Test that list_models caches results to avoid rate limiting"""
142+
client = CopilotClient({"cli_path": CLI_PATH, "use_stdio": True})
143+
144+
try:
145+
await client.start()
146+
147+
auth_status = await client.get_auth_status()
148+
if not auth_status.isAuthenticated:
149+
# Skip if not authenticated - models.list requires auth
150+
await client.stop()
151+
return
152+
153+
# First call should fetch from backend
154+
models1 = await client.list_models()
155+
assert isinstance(models1, list)
156+
157+
# Second call should return from cache (different list object but same content)
158+
models2 = await client.list_models()
159+
assert models2 is not models1, "Should return a copy, not the same object"
160+
assert len(models2) == len(models1), "Cached results should have same content"
161+
if len(models1) > 0:
162+
assert models1[0].id == models2[0].id, "Cached models should match"
163+
164+
# After stopping, cache should be cleared
165+
await client.stop()
166+
167+
# Restart and verify cache is empty
168+
await client.start()
169+
170+
# Check authentication again after restart
171+
auth_status = await client.get_auth_status()
172+
if not auth_status.isAuthenticated:
173+
await client.stop()
174+
return
175+
176+
models3 = await client.list_models()
177+
assert models3 is not models1, "Cache should be cleared after disconnect"
178+
179+
await client.stop()
180+
finally:
181+
await client.force_stop()

0 commit comments

Comments
 (0)