Spaces:
Runtime error
Runtime error
Oleg Lavrovsky
commited on
Docs root
Browse files
app.py
CHANGED
|
@@ -21,6 +21,9 @@ hf_token = os.getenv("HF_TOKEN", None)
|
|
| 21 |
if hf_token is not None:
|
| 22 |
login(token=hf_token)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# Keep data in session
|
| 25 |
model = None
|
| 26 |
tokenizer = None
|
|
@@ -42,9 +45,7 @@ async def lifespan(app: FastAPI):
|
|
| 42 |
"""Load the transformer model on startup"""
|
| 43 |
global model, tokenizer
|
| 44 |
try:
|
| 45 |
-
logger.info("Loading
|
| 46 |
-
# TODO: make this configurable
|
| 47 |
-
model_name = "swiss-ai/Apertus-8B-Instruct-2509"
|
| 48 |
|
| 49 |
# Automatically select device based on availability
|
| 50 |
device = "cuda" if cuda.is_available() else "cpu"
|
|
@@ -73,6 +74,7 @@ app = FastAPI(
|
|
| 73 |
title="Apertus API",
|
| 74 |
description="REST API for serving Apertus models via Hugging Face transformers",
|
| 75 |
version="0.1.0",
|
|
|
|
| 76 |
lifespan=lifespan
|
| 77 |
)
|
| 78 |
|
|
@@ -144,7 +146,5 @@ async def health_check():
|
|
| 144 |
"gpu_available": cuda.is_available()
|
| 145 |
}
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
return PlainTextResponse('Habemus Apertus')
|
| 150 |
-
|
|
|
|
| 21 |
if hf_token is not None:
|
| 22 |
login(token=hf_token)
|
| 23 |
|
| 24 |
+
# Configurable model identifier
|
| 25 |
+
model_name = os.getenv("HF_MODEL", "swiss-ai/Apertus-8B-Instruct-2509")
|
| 26 |
+
|
| 27 |
# Keep data in session
|
| 28 |
model = None
|
| 29 |
tokenizer = None
|
|
|
|
| 45 |
"""Load the transformer model on startup"""
|
| 46 |
global model, tokenizer
|
| 47 |
try:
|
| 48 |
+
logger.info(f"Loading model: {model_name}")
|
|
|
|
|
|
|
| 49 |
|
| 50 |
# Automatically select device based on availability
|
| 51 |
device = "cuda" if cuda.is_available() else "cpu"
|
|
|
|
| 74 |
title="Apertus API",
|
| 75 |
description="REST API for serving Apertus models via Hugging Face transformers",
|
| 76 |
version="0.1.0",
|
| 77 |
+
docs_url="/",
|
| 78 |
lifespan=lifespan
|
| 79 |
)
|
| 80 |
|
|
|
|
| 146 |
"gpu_available": cuda.is_available()
|
| 147 |
}
|
| 148 |
|
| 149 |
+
if __name__=='__main__':
|
| 150 |
+
uvicorn.run('app:app', reload=True)
|
|
|
|
|
|