File size: 3,113 Bytes
15d27ef
2143c4b
4625807
 
b2c2f23
fa5f350
9f3b48c
 
 
6e98acb
2dcb7ad
5b32c71
2dcb7ad
bf292d9
4625807
 
 
 
b843648
 
2143c4b
2001be3
 
2143c4b
15d27ef
2143c4b
4625807
2143c4b
2001be3
2143c4b
4625807
 
 
9f3b48c
 
fa5f350
 
4625807
 
2dcb7ad
4625807
 
9f3b48c
 
 
 
 
 
 
 
 
 
 
 
 
 
bf292d9
4625807
b843648
 
4625807
 
 
b843648
 
15d27ef
b843648
2143c4b
4625807
b843648
4625807
 
 
b843648
 
4625807
 
b843648
 
 
 
 
fa5f350
 
b843648
 
bf292d9
15972c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# app.py
import asyncio, logging
import gradio as gr

from config import settings
from rabbit_base import RabbitBase
from listener import RabbitListenerBase
from rabbit_repo import RabbitRepo
from oa_server import OpenAIServers
#from vllm_backend import VLLMChatBackend, StubImagesBackend
#from transformers_backend import TransformersChatBackend, StubImagesBackend
from hf_backend import HFChatBackend, StubImagesBackend


logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
)
log = logging.getLogger("app")

# ----------------- Hugging Face Spaces helpers -----------------
try:
    import spaces

    @spaces.GPU(duration=60)
    def gpu_entrypoint() -> str:
        return "gpu: ready"

except Exception:
    def gpu_entrypoint() -> str:
        return "gpu: not available (CPU only)"

# ----------------- RabbitMQ wiring -----------------
publisher = RabbitRepo(external_source="openai.mq.server")
resolver = (lambda name: "direct" if name.startswith("oa.") else settings.RABBIT_EXCHANGE_TYPE)
base = RabbitBase(exchange_type_resolver=resolver)

servers = OpenAIServers(
    publisher,
    chat_backend=HFChatBackend(),
    images_backend=StubImagesBackend()
)

handlers = {
    "oaChatCreate": servers.handle_chat_create,
    "oaImagesGenerate": servers.handle_images_generate,
}

DECLS = [
    {"ExchangeName": "oa.chat.create", "FuncName": "oaChatCreate",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
    {"ExchangeName": "oa.images.generate", "FuncName": "oaImagesGenerate",
     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
]

listener = RabbitListenerBase(base, instance_name=settings.RABBIT_INSTANCE_NAME, handlers=handlers)

# ----------------- Startup init -----------------
async def _startup_init():
    try:
        await base.connect()          # connect to RabbitMQ
        await listener.start(DECLS)   # start queue listeners
        return "OpenAI MQ + vLLM: ready"
    except Exception as e:
        log.exception("Startup init failed")
        return f"ERROR: {e}"

async def ping():
    return "ok"

# ----------------- Gradio UI -----------------
with gr.Blocks(title="OpenAI over RabbitMQ (local vLLM)", theme=gr.themes.Soft()) as demo:
    gr.Markdown("## OpenAI-compatible over RabbitMQ — using vLLM locally inside Space")
    with gr.Tabs():
        with gr.Tab("Service"):
            btn = gr.Button("Ping")
            out = gr.Textbox(label="Ping result")
            btn.click(ping, inputs=None, outputs=out)
            init_status = gr.Textbox(label="Startup status", interactive=False)
            demo.load(fn=_startup_init, inputs=None, outputs=init_status)

        with gr.Tab("@spaces.GPU Probe"):
            gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
            gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True, mcp_server=True)