Arif commited on
Commit
89d2b21
Β·
1 Parent(s): 09c07f9

Docker model runner still error, trying to resolve

Browse files
backend/app/config.py CHANGED
@@ -60,15 +60,15 @@ class Settings(BaseSettings):
60
 
61
  # ===== DOCKER MODEL RUNNER MODE (DEBUG=false) =====
62
  # Metis pattern: stateless HTTP API to DMR on host
63
- model_runner_url: str = Field(
64
- default="http://host.docker.internal:11434/v1",
65
  env="MODEL_RUNNER_URL",
66
  description="Docker Model Runner API endpoint (from containers use host.docker.internal)"
67
  )
68
- model_name: str = Field(
69
- default="llama3.2:1B-Q4_0",
70
  env="MODEL_NAME",
71
- description="Model name as shown in 'docker model ls'"
72
  )
73
  docker_timeout: int = Field(
74
  default=300,
@@ -76,18 +76,6 @@ class Settings(BaseSettings):
76
  description="Timeout for Docker Model Runner requests (seconds)"
77
  )
78
 
79
- # ===== BACKWARDS COMPATIBILITY (deprecated) =====
80
- # These are kept for backwards compatibility but use new names above
81
- @property
82
- def docker_model_runner_url(self) -> str:
83
- """Backwards compatible alias for model_runner_url"""
84
- return self.model_runner_url
85
-
86
- @property
87
- def llm_model_name_docker(self) -> str:
88
- """Backwards compatible alias for model_name"""
89
- return self.model_name
90
-
91
  # ===== DATA PROCESSING =====
92
  max_file_size_mb: int = Field(
93
  default=50,
@@ -102,6 +90,8 @@ class Settings(BaseSettings):
102
  env_file = ".env.local"
103
  case_sensitive = False
104
  extra = "allow"
 
 
105
 
106
 
107
  @lru_cache
 
60
 
61
  # ===== DOCKER MODEL RUNNER MODE (DEBUG=false) =====
62
  # Metis pattern: stateless HTTP API to DMR on host
63
+ runner_url: str = Field(
64
+ default="http://host.docker.internal:11434/engines/llama.cpp/v1",
65
  env="MODEL_RUNNER_URL",
66
  description="Docker Model Runner API endpoint (from containers use host.docker.internal)"
67
  )
68
+ llm_model: str = Field(
69
+ default="ai/llama3.2:1B-Q4_0",
70
  env="MODEL_NAME",
71
+ description="Model name as OCI reference (e.g., ai/llama3.2:1B-Q4_0)"
72
  )
73
  docker_timeout: int = Field(
74
  default=300,
 
76
  description="Timeout for Docker Model Runner requests (seconds)"
77
  )
78
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # ===== DATA PROCESSING =====
80
  max_file_size_mb: int = Field(
81
  default=50,
 
90
  env_file = ".env.local"
91
  case_sensitive = False
92
  extra = "allow"
93
+ # Fix Pydantic warning about protected namespaces
94
+ protected_namespaces = ('settings_',)
95
 
96
 
97
  @lru_cache
backend/app/main.py CHANGED
@@ -35,10 +35,10 @@ async def lifespan(app: FastAPI):
35
  }
36
 
37
  docker_config = {
38
- "model_name": settings.llm_model_name_docker,
39
  "max_tokens": settings.llm_max_tokens,
40
  "temperature": settings.llm_temperature,
41
- "docker_url": settings.docker_model_runner_url,
42
  "timeout": settings.docker_timeout
43
  }
44
 
 
35
  }
36
 
37
  docker_config = {
38
+ "model_name": settings.llm_model,
39
  "max_tokens": settings.llm_max_tokens,
40
  "temperature": settings.llm_temperature,
41
+ "docker_url": settings.runner_url,
42
  "timeout": settings.docker_timeout
43
  }
44
 
backend/app/services/llm_service.py CHANGED
@@ -152,11 +152,11 @@ class LLMServiceDockerModelRunner(BaseLLMService):
152
  model_name: str,
153
  max_tokens: int,
154
  temperature: float,
155
- docker_url: str,
156
  timeout: int = 300
157
  ):
158
  super().__init__(model_name, max_tokens, temperature)
159
- self.docker_url = docker_url.rstrip("/") # Remove trailing slash
160
  self.timeout = timeout
161
  self.client = None
162
 
@@ -170,11 +170,11 @@ class LLMServiceDockerModelRunner(BaseLLMService):
170
  return True
171
 
172
  try:
173
- self.logger.info(f"πŸ”„ Connecting to Docker Model Runner: {self.docker_url}")
174
  self.client = httpx.AsyncClient(timeout=self.timeout)
175
 
176
  # OpenAI-compatible endpoint: GET /v1/models
177
- response = await self.client.get(f"{self.docker_url}/models")
178
 
179
  if response.status_code == 200:
180
  models = response.json()
@@ -208,7 +208,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
208
 
209
  # OpenAI-compatible endpoint: POST /v1/chat/completions
210
  response = await self.client.post(
211
- f"{self.docker_url}/chat/completions",
212
  json=payload
213
  )
214
 
@@ -285,7 +285,7 @@ def get_llm_service(debug: bool = None, mlx_config: dict = None, docker_config:
285
  # Determine debug mode
286
  if debug is None:
287
  debug = os.getenv("DEBUG", "false").lower() == "true"
288
- if hasattr(settings, "debug"):
289
  debug = settings.debug
290
 
291
  # Try MLX first (if DEBUG=true)
@@ -303,23 +303,23 @@ def get_llm_service(debug: bool = None, mlx_config: dict = None, docker_config:
303
  logger.warning(f"⚠️ MLX failed: {e}, falling back to Docker Model Runner")
304
 
305
  # Try Docker Model Runner (Metis pattern)
306
- docker_url = None
307
  if docker_config:
308
- docker_url = docker_config.get("docker_url")
309
  elif settings:
310
- docker_url = getattr(settings, "model_runner_url", None)
311
  else:
312
- docker_url = os.getenv("MODEL_RUNNER_URL")
313
 
314
- if docker_url:
315
  try:
316
  model_name = None
317
  if docker_config:
318
  model_name = docker_config.get("model_name")
319
  elif settings:
320
- model_name = getattr(settings, "model_name", None)
321
  else:
322
- model_name = os.getenv("MODEL_NAME", "llama3.2:1B-Q4_0")
323
 
324
  config = {
325
  "model_name": model_name,
@@ -327,11 +327,11 @@ def get_llm_service(debug: bool = None, mlx_config: dict = None, docker_config:
327
  getattr(settings, "llm_max_tokens", 512) if settings else 512),
328
  "temperature": (docker_config or {}).get("temperature",
329
  getattr(settings, "llm_temperature", 0.7) if settings else 0.7),
330
- "docker_url": docker_url,
331
  "timeout": (docker_config or {}).get("timeout",
332
  getattr(settings, "docker_timeout", 300) if settings else 300)
333
  }
334
- logger.info(f"πŸ“Œ Mode: Docker Model Runner at {docker_url}")
335
  logger.info(f"πŸ“Œ Model: {config['model_name']}")
336
  logger.info(f"βœ… Using host GPU acceleration (llama.cpp Metal backend)")
337
  return LLMServiceDockerModelRunner(**config)
 
152
  model_name: str,
153
  max_tokens: int,
154
  temperature: float,
155
+ runner_url: str,
156
  timeout: int = 300
157
  ):
158
  super().__init__(model_name, max_tokens, temperature)
159
+ self.runner_url = runner_url.rstrip("/") # Remove trailing slash
160
  self.timeout = timeout
161
  self.client = None
162
 
 
170
  return True
171
 
172
  try:
173
+ self.logger.info(f"πŸ”„ Connecting to Docker Model Runner: {self.runner_url}")
174
  self.client = httpx.AsyncClient(timeout=self.timeout)
175
 
176
  # OpenAI-compatible endpoint: GET /v1/models
177
+ response = await self.client.get(f"{self.runner_url}/models")
178
 
179
  if response.status_code == 200:
180
  models = response.json()
 
208
 
209
  # OpenAI-compatible endpoint: POST /v1/chat/completions
210
  response = await self.client.post(
211
+ f"{self.runner_url}/chat/completions",
212
  json=payload
213
  )
214
 
 
285
  # Determine debug mode
286
  if debug is None:
287
  debug = os.getenv("DEBUG", "false").lower() == "true"
288
+ if settings and hasattr(settings, "debug"):
289
  debug = settings.debug
290
 
291
  # Try MLX first (if DEBUG=true)
 
303
  logger.warning(f"⚠️ MLX failed: {e}, falling back to Docker Model Runner")
304
 
305
  # Try Docker Model Runner (Metis pattern)
306
+ runner_url = None
307
  if docker_config:
308
+ runner_url = docker_config.get("runner_url")
309
  elif settings:
310
+ runner_url = getattr(settings, "runner_url", None)
311
  else:
312
+ runner_url = os.getenv("MODEL_RUNNER_URL")
313
 
314
+ if runner_url:
315
  try:
316
  model_name = None
317
  if docker_config:
318
  model_name = docker_config.get("model_name")
319
  elif settings:
320
+ model_name = getattr(settings, "llm_model", None)
321
  else:
322
+ model_name = os.getenv("MODEL_NAME", "ai/llama3.2:1B-Q4_0")
323
 
324
  config = {
325
  "model_name": model_name,
 
327
  getattr(settings, "llm_max_tokens", 512) if settings else 512),
328
  "temperature": (docker_config or {}).get("temperature",
329
  getattr(settings, "llm_temperature", 0.7) if settings else 0.7),
330
+ "runner_url": runner_url,
331
  "timeout": (docker_config or {}).get("timeout",
332
  getattr(settings, "docker_timeout", 300) if settings else 300)
333
  }
334
+ logger.info(f"πŸ“Œ Mode: Docker Model Runner at {runner_url}")
335
  logger.info(f"πŸ“Œ Model: {config['model_name']}")
336
  logger.info(f"βœ… Using host GPU acceleration (llama.cpp Metal backend)")
337
  return LLMServiceDockerModelRunner(**config)