|
|
|
|
|
import asyncio |
|
|
import logging |
|
|
from typing import Any, Dict, List |
|
|
import gradio as gr |
|
|
|
|
|
from config import settings |
|
|
from rabbit_base import RabbitBase |
|
|
from listener import RabbitListenerBase |
|
|
from rabbit_repo import RabbitRepo |
|
|
from service import LLMService |
|
|
from runners.base import ILLMRunner |
|
|
from factory import default_runner_factory |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") |
|
|
log = logging.getLogger("app") |
|
|
|
|
|
|
|
|
try: |
|
|
import spaces |
|
|
|
|
|
@spaces.GPU(duration=60) |
|
|
def gpu_entrypoint() -> str: |
|
|
return "gpu: ready" |
|
|
except Exception: |
|
|
def gpu_entrypoint() -> str: |
|
|
return "gpu: not available (CPU only)" |
|
|
|
|
|
|
|
|
publisher = RabbitRepo(external_source="https://space.external") |
|
|
service = LLMService(publisher, default_runner_factory) |
|
|
|
|
|
|
|
|
async def h_start(data): await service.StartProcess(data or {}) |
|
|
async def h_user(data): await service.UserInput(data or {}) |
|
|
async def h_remove(data): await service.RemoveSession(data or {}) |
|
|
async def h_stop(data): await service.StopRequest(data or {}) |
|
|
async def h_qir(data): await service.QueryIndexResult(data or {}) |
|
|
async def h_getreg(_): await service.GetFunctionRegistry(False) |
|
|
async def h_getreg_f(_): await service.GetFunctionRegistry(True) |
|
|
|
|
|
handlers = { |
|
|
"llmStartSession": h_start, |
|
|
"llmUserInput": h_user, |
|
|
"llmRemoveSession": h_remove, |
|
|
"llmStopRequest": h_stop, |
|
|
"queryIndexResult": h_qir, |
|
|
"getFunctionRegistry": h_getreg, |
|
|
"getFunctionRegistryFiltered": h_getreg_f, |
|
|
} |
|
|
|
|
|
|
|
|
base = RabbitBase() |
|
|
listener = RabbitListenerBase( |
|
|
base, |
|
|
instance_name=settings.RABBIT_INSTANCE_NAME, |
|
|
handlers=handlers, |
|
|
) |
|
|
|
|
|
|
|
|
DECLS: List[Dict[str, Any]] = [ |
|
|
{"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession", |
|
|
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput", |
|
|
"MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession", |
|
|
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest", |
|
|
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult", |
|
|
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry", |
|
|
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
{"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered", |
|
|
"MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, |
|
|
] |
|
|
|
|
|
|
|
|
async def ping() -> str: |
|
|
return "ok" |
|
|
|
|
|
|
|
|
async def _startup_init(): |
|
|
try: |
|
|
await publisher.connect() |
|
|
await service.init() |
|
|
await listener.start(DECLS) |
|
|
return "listener: ready" |
|
|
except Exception as e: |
|
|
log.exception("Startup init failed") |
|
|
return f"listener: ERROR -> {e}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("## LLM Runner β RabbitMQ listener (ZeroGPU-ready)") |
|
|
|
|
|
with gr.Tabs(): |
|
|
with gr.Tab("Service"): |
|
|
with gr.Row(): |
|
|
btn = gr.Button("Ping") |
|
|
out = gr.Textbox(label="Ping result") |
|
|
btn.click(ping, inputs=None, outputs=out) |
|
|
|
|
|
|
|
|
init_status = gr.Textbox(label="Startup status", interactive=False) |
|
|
demo.load(fn=_startup_init, inputs=None, outputs=init_status) |
|
|
|
|
|
with gr.Tab("@spaces.GPU Probe"): |
|
|
gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.") |
|
|
with gr.Row(): |
|
|
gpu_btn = gr.Button("GPU Ready Probe", variant="primary") |
|
|
gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False) |
|
|
|
|
|
gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True) |
|
|
|