Oleg Lavrovsky commited on
Commit
93fa881
·
unverified ·
1 Parent(s): 0aca48e
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -21,6 +21,9 @@ hf_token = os.getenv("HF_TOKEN", None)
21
  if hf_token is not None:
22
  login(token=hf_token)
23
 
 
 
 
24
  # Keep data in session
25
  model = None
26
  tokenizer = None
@@ -42,9 +45,7 @@ async def lifespan(app: FastAPI):
42
  """Load the transformer model on startup"""
43
  global model, tokenizer
44
  try:
45
- logger.info("Loading sentiment analysis model...")
46
- # TODO: make this configurable
47
- model_name = "swiss-ai/Apertus-8B-Instruct-2509"
48
 
49
  # Automatically select device based on availability
50
  device = "cuda" if cuda.is_available() else "cpu"
@@ -73,6 +74,7 @@ app = FastAPI(
73
  title="Apertus API",
74
  description="REST API for serving Apertus models via Hugging Face transformers",
75
  version="0.1.0",
 
76
  lifespan=lifespan
77
  )
78
 
@@ -144,7 +146,5 @@ async def health_check():
144
  "gpu_available": cuda.is_available()
145
  }
146
 
147
- @app.get("/")
148
- def read_root():
149
- return PlainTextResponse('Habemus Apertus')
150
-
 
21
  if hf_token is not None:
22
  login(token=hf_token)
23
 
24
+ # Configurable model identifier
25
+ model_name = os.getenv("HF_MODEL", "swiss-ai/Apertus-8B-Instruct-2509")
26
+
27
  # Keep data in session
28
  model = None
29
  tokenizer = None
 
45
  """Load the transformer model on startup"""
46
  global model, tokenizer
47
  try:
48
+ logger.info(f"Loading model: {model_name}")
 
 
49
 
50
  # Automatically select device based on availability
51
  device = "cuda" if cuda.is_available() else "cpu"
 
74
  title="Apertus API",
75
  description="REST API for serving Apertus models via Hugging Face transformers",
76
  version="0.1.0",
77
+ docs_url="/",
78
  lifespan=lifespan
79
  )
80
 
 
146
  "gpu_available": cuda.is_available()
147
  }
148
 
149
+ if __name__=='__main__':
150
+ uvicorn.run('app:app', reload=True)