Update app.py
Browse files
app.py
CHANGED
|
@@ -436,6 +436,8 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
| 436 |
|
| 437 |
# app.py
|
| 438 |
|
|
|
|
|
|
|
| 439 |
import os
|
| 440 |
import gradio as gr
|
| 441 |
from huggingface_hub import InferenceClient
|
|
@@ -447,7 +449,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 447 |
|
| 448 |
# Initialize Hugging Face Inference Client
|
| 449 |
client = InferenceClient(
|
| 450 |
-
model="mistralai/
|
| 451 |
token=HF_TOKEN
|
| 452 |
)
|
| 453 |
|
|
@@ -459,36 +461,32 @@ system_message = (
|
|
| 459 |
"based on their requirements."
|
| 460 |
)
|
| 461 |
|
| 462 |
-
#
|
| 463 |
-
def format_prompt(message, history):
|
| 464 |
-
prompt = f"System: {system_message}\n"
|
| 465 |
-
for user_msg, bot_msg in history:
|
| 466 |
-
prompt += f"User: {user_msg}\n"
|
| 467 |
-
if bot_msg:
|
| 468 |
-
prompt += f"Assistant: {bot_msg}\n"
|
| 469 |
-
prompt += f"User: {message}\nAssistant:"
|
| 470 |
-
return prompt
|
| 471 |
-
|
| 472 |
-
# Streaming chatbot logic using text generation
|
| 473 |
def respond(message, history):
|
| 474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
|
|
|
|
| 476 |
response = ""
|
| 477 |
-
for chunk in client.
|
| 478 |
-
|
| 479 |
-
|
|
|
|
| 480 |
temperature=0.7,
|
| 481 |
top_p=0.95,
|
| 482 |
-
|
| 483 |
-
stream=True
|
| 484 |
):
|
| 485 |
-
|
|
|
|
| 486 |
yield response
|
| 487 |
|
| 488 |
# Create Gradio interface
|
| 489 |
with gr.Blocks() as demo:
|
| 490 |
-
chatbot = gr.Chatbot(type='messages') #
|
| 491 |
-
gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")
|
| 492 |
|
| 493 |
# Launch app
|
| 494 |
if __name__ == "__main__":
|
|
|
|
| 436 |
|
| 437 |
# app.py
|
| 438 |
|
| 439 |
+
# app.py
|
| 440 |
+
|
| 441 |
import os
|
| 442 |
import gradio as gr
|
| 443 |
from huggingface_hub import InferenceClient
|
|
|
|
| 449 |
|
| 450 |
# Initialize Hugging Face Inference Client
|
| 451 |
client = InferenceClient(
|
| 452 |
+
model="mistralai/Codestral-22B-v0.1",
|
| 453 |
token=HF_TOKEN
|
| 454 |
)
|
| 455 |
|
|
|
|
| 461 |
"based on their requirements."
|
| 462 |
)
|
| 463 |
|
| 464 |
+
# Streaming chatbot logic using chat.completions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
def respond(message, history):
|
| 466 |
+
# Prepare messages with system prompt
|
| 467 |
+
messages = [{"role": "system", "content": system_message}]
|
| 468 |
+
for msg in history:
|
| 469 |
+
messages.append(msg)
|
| 470 |
+
messages.append({"role": "user", "content": message})
|
| 471 |
|
| 472 |
+
# Stream response from the model
|
| 473 |
response = ""
|
| 474 |
+
for chunk in client.chat.completions.create(
|
| 475 |
+
model="mistralai/Codestral-22B-v0.1",
|
| 476 |
+
messages=messages,
|
| 477 |
+
max_tokens=1024,
|
| 478 |
temperature=0.7,
|
| 479 |
top_p=0.95,
|
| 480 |
+
stream=True,
|
|
|
|
| 481 |
):
|
| 482 |
+
token = chunk.choices[0].delta.get("content", "") or ""
|
| 483 |
+
response += token
|
| 484 |
yield response
|
| 485 |
|
| 486 |
# Create Gradio interface
|
| 487 |
with gr.Blocks() as demo:
|
| 488 |
+
chatbot = gr.Chatbot(type='messages') # Use modern message format
|
| 489 |
+
gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages") # Match format
|
| 490 |
|
| 491 |
# Launch app
|
| 492 |
if __name__ == "__main__":
|