Spaces:
Running
on
Zero
Running
on
Zero
Upload step03_chatbot.py with huggingface_hub
Browse files- step03_chatbot.py +3 -3
step03_chatbot.py
CHANGED
|
@@ -235,6 +235,7 @@ class Qwen3Reranker:
|
|
| 235 |
elif self.is_cuda:
|
| 236 |
# Configuration pour CUDA
|
| 237 |
config["torch_dtype"] = torch.float16
|
|
|
|
| 238 |
if self.use_flash_attention:
|
| 239 |
try:
|
| 240 |
config["attn_implementation"] = "flash_attention_2"
|
|
@@ -242,8 +243,6 @@ class Qwen3Reranker:
|
|
| 242 |
except Exception:
|
| 243 |
print(" - Flash Attention 2 non disponible, utilisation standard")
|
| 244 |
self.use_flash_attention = False
|
| 245 |
-
else:
|
| 246 |
-
config["device_map"] = "auto"
|
| 247 |
else:
|
| 248 |
# Configuration pour CPU
|
| 249 |
config["torch_dtype"] = torch.float32
|
|
@@ -429,7 +428,7 @@ class GenericRAGChatbot:
|
|
| 429 |
|
| 430 |
if self.is_zerogpu:
|
| 431 |
print("🚀 Environnement ZeroGPU détecté - optimisations cloud")
|
| 432 |
-
self.use_flash_attention =
|
| 433 |
elif self.is_mps and use_flash_attention:
|
| 434 |
print("🍎 Mac avec MPS détecté - désactivation automatique de Flash Attention")
|
| 435 |
self.use_flash_attention = False
|
|
@@ -580,6 +579,7 @@ class GenericRAGChatbot:
|
|
| 580 |
self.config.embedding_model,
|
| 581 |
model_kwargs={
|
| 582 |
"attn_implementation": "flash_attention_2",
|
|
|
|
| 583 |
"device_map": "auto"
|
| 584 |
},
|
| 585 |
tokenizer_kwargs={"padding_side": "left"}
|
|
|
|
| 235 |
elif self.is_cuda:
|
| 236 |
# Configuration pour CUDA
|
| 237 |
config["torch_dtype"] = torch.float16
|
| 238 |
+
config["device_map"] = "auto" # Toujours utiliser auto pour CUDA
|
| 239 |
if self.use_flash_attention:
|
| 240 |
try:
|
| 241 |
config["attn_implementation"] = "flash_attention_2"
|
|
|
|
| 243 |
except Exception:
|
| 244 |
print(" - Flash Attention 2 non disponible, utilisation standard")
|
| 245 |
self.use_flash_attention = False
|
|
|
|
|
|
|
| 246 |
else:
|
| 247 |
# Configuration pour CPU
|
| 248 |
config["torch_dtype"] = torch.float32
|
|
|
|
| 428 |
|
| 429 |
if self.is_zerogpu:
|
| 430 |
print("🚀 Environnement ZeroGPU détecté - optimisations cloud")
|
| 431 |
+
self.use_flash_attention = False # Désactiver Flash Attention temporairement sur ZeroGPU
|
| 432 |
elif self.is_mps and use_flash_attention:
|
| 433 |
print("🍎 Mac avec MPS détecté - désactivation automatique de Flash Attention")
|
| 434 |
self.use_flash_attention = False
|
|
|
|
| 579 |
self.config.embedding_model,
|
| 580 |
model_kwargs={
|
| 581 |
"attn_implementation": "flash_attention_2",
|
| 582 |
+
"torch_dtype": torch.float16, # Requis pour Flash Attention
|
| 583 |
"device_map": "auto"
|
| 584 |
},
|
| 585 |
tokenizer_kwargs={"padding_side": "left"}
|