diff --git a/backend/app/celery/celery_app.py b/backend/app/celery/celery_app.py index 463a1c982..0dd72df2c 100644 --- a/backend/app/celery/celery_app.py +++ b/backend/app/celery/celery_app.py @@ -9,6 +9,19 @@ logger = logging.getLogger(__name__) +@worker_process_init.connect +def warm_llm_modules(**_) -> None: + """Import LLM service modules in each worker process right after fork. + + This runs once per worker before any task arrives, so LLM calls + (the most latency-sensitive path) never pay a cold-import penalty. + The main process is unaffected, keeping overall memory low. + """ + import app.services.llm.jobs # noqa: F401 + + logger.info("[warm_llm_modules] LLM modules pre-loaded in worker process") + + # Create Celery instance celery_app = Celery( "ai_platform", diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 05e28440c..44a7d7771 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -109,8 +109,8 @@ def AWS_S3_BUCKET(self) -> str: # Celery Configuration CELERY_WORKER_CONCURRENCY: int | None = None - CELERY_WORKER_MAX_TASKS_PER_CHILD: int = 1 - CELERY_WORKER_MAX_MEMORY_PER_CHILD: int = 200000 + CELERY_WORKER_MAX_TASKS_PER_CHILD: int = 150 + CELERY_WORKER_MAX_MEMORY_PER_CHILD: int = 300000 CELERY_TASK_SOFT_TIME_LIMIT: int = 300 CELERY_TASK_TIME_LIMIT: int = 600 CELERY_TASK_MAX_RETRIES: int = 3