Spaces:
Runtime error
Runtime error
Oleg Lavrovsky
commited on
Model clearing
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 9 |
from huggingface_hub import login
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import os
|
|
|
|
| 12 |
|
| 13 |
import logging
|
| 14 |
# Configure logging
|
|
@@ -65,8 +66,9 @@ async def lifespan(app: FastAPI):
|
|
| 65 |
raise e
|
| 66 |
# Release resources when the app is stopped
|
| 67 |
yield
|
| 68 |
-
|
| 69 |
-
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
# Setup our app
|
|
@@ -130,6 +132,7 @@ async def predict(q: str):
|
|
| 130 |
)
|
| 131 |
|
| 132 |
# Get and decode the output
|
|
|
|
| 133 |
output_ids = generated_ids[0][-1]
|
| 134 |
logger.debug(output_ids)
|
| 135 |
#[len(model_inputs.input_ids[0]) :]
|
|
|
|
| 9 |
from huggingface_hub import login
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
import os
|
| 12 |
+
import uvicorn
|
| 13 |
|
| 14 |
import logging
|
| 15 |
# Configure logging
|
|
|
|
| 66 |
raise e
|
| 67 |
# Release resources when the app is stopped
|
| 68 |
yield
|
| 69 |
+
del model
|
| 70 |
+
del tokenizer
|
| 71 |
+
cuda.empty_cache()
|
| 72 |
|
| 73 |
|
| 74 |
# Setup our app
|
|
|
|
| 132 |
)
|
| 133 |
|
| 134 |
# Get and decode the output
|
| 135 |
+
print(generated_ids)
|
| 136 |
output_ids = generated_ids[0][-1]
|
| 137 |
logger.debug(output_ids)
|
| 138 |
#[len(model_inputs.input_ids[0]) :]
|