File size: 3,113 Bytes
15d27ef 2143c4b 4625807 b2c2f23 fa5f350 9f3b48c 6e98acb 2dcb7ad 5b32c71 2dcb7ad bf292d9 4625807 b843648 2143c4b 2001be3 2143c4b 15d27ef 2143c4b 4625807 2143c4b 2001be3 2143c4b 4625807 9f3b48c fa5f350 4625807 2dcb7ad 4625807 9f3b48c bf292d9 4625807 b843648 4625807 b843648 15d27ef b843648 2143c4b 4625807 b843648 4625807 b843648 4625807 b843648 fa5f350 b843648 bf292d9 15972c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# app.py
import asyncio, logging
import gradio as gr
from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from oa_server import OpenAIServers
#from vllm_backend import VLLMChatBackend, StubImagesBackend
#from transformers_backend import TransformersChatBackend, StubImagesBackend
from hf_backend import HFChatBackend, StubImagesBackend
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
log = logging.getLogger("app")
# ----------------- Hugging Face Spaces helpers -----------------
try:
import spaces
@spaces.GPU(duration=60)
def gpu_entrypoint() -> str:
return "gpu: ready"
except Exception:
def gpu_entrypoint() -> str:
return "gpu: not available (CPU only)"
# ----------------- RabbitMQ wiring -----------------
publisher = RabbitRepo(external_source="openai.mq.server")
resolver = (lambda name: "direct" if name.startswith("oa.") else settings.RABBIT_EXCHANGE_TYPE)
base = RabbitBase(exchange_type_resolver=resolver)
servers = OpenAIServers(
publisher,
chat_backend=HFChatBackend(),
images_backend=StubImagesBackend()
)
handlers = {
"oaChatCreate": servers.handle_chat_create,
"oaImagesGenerate": servers.handle_images_generate,
}
DECLS = [
{"ExchangeName": "oa.chat.create", "FuncName": "oaChatCreate",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
{"ExchangeName": "oa.images.generate", "FuncName": "oaImagesGenerate",
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]
listener = RabbitListenerBase(base, instance_name=settings.RABBIT_INSTANCE_NAME, handlers=handlers)
# ----------------- Startup init -----------------
async def _startup_init():
try:
await base.connect() # connect to RabbitMQ
await listener.start(DECLS) # start queue listeners
return "OpenAI MQ + vLLM: ready"
except Exception as e:
log.exception("Startup init failed")
return f"ERROR: {e}"
async def ping():
return "ok"
# ----------------- Gradio UI -----------------
with gr.Blocks(title="OpenAI over RabbitMQ (local vLLM)", theme=gr.themes.Soft()) as demo:
gr.Markdown("## OpenAI-compatible over RabbitMQ — using vLLM locally inside Space")
with gr.Tabs():
with gr.Tab("Service"):
btn = gr.Button("Ping")
out = gr.Textbox(label="Ping result")
btn.click(ping, inputs=None, outputs=out)
init_status = gr.Textbox(label="Startup status", interactive=False)
demo.load(fn=_startup_init, inputs=None, outputs=init_status)
with gr.Tab("@spaces.GPU Probe"):
gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True, mcp_server=True)
|