diff --git a/backend/app/celery/celery_app.py b/backend/app/celery/celery_app.py
index 463a1c982..0dd72df2c 100644
--- a/backend/app/celery/celery_app.py
+++ b/backend/app/celery/celery_app.py
@@ -9,6 +9,19 @@
 logger = logging.getLogger(__name__)
 
 
+@worker_process_init.connect
+def warm_llm_modules(**_) -> None:
+    """Import LLM service modules in each worker process right after fork.
+
+    This runs once per worker before any task arrives, so LLM calls
+    (the most latency-sensitive path) never pay a cold-import penalty.
+    The main process is unaffected, keeping overall memory low.
+    """
+    import app.services.llm.jobs  # noqa: F401
+
+    logger.info("[warm_llm_modules] LLM modules pre-loaded in worker process")
+
+
 # Create Celery instance
 celery_app = Celery(
     "ai_platform",
diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 05e28440c..44a7d7771 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -109,8 +109,8 @@ def AWS_S3_BUCKET(self) -> str:
 
     # Celery Configuration
     CELERY_WORKER_CONCURRENCY: int | None = None
-    CELERY_WORKER_MAX_TASKS_PER_CHILD: int = 1
-    CELERY_WORKER_MAX_MEMORY_PER_CHILD: int = 200000
+    CELERY_WORKER_MAX_TASKS_PER_CHILD: int = 150
+    CELERY_WORKER_MAX_MEMORY_PER_CHILD: int = 300000
     CELERY_TASK_SOFT_TIME_LIMIT: int = 300
     CELERY_TASK_TIME_LIMIT: int = 600
     CELERY_TASK_MAX_RETRIES: int = 3