# app.py (Gradio-only, ZeroGPU-safe) import asyncio import logging from typing import Any, Dict, List import gradio as gr from config import settings from rabbit_base import RabbitBase from listener import RabbitListenerBase from rabbit_repo import RabbitRepo from service import LLMService from runners.base import ILLMRunner from factory import default_runner_factory # ---------- logging ---------- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") log = logging.getLogger("app") # ---------- @spaces.GPU entrypoint ---------- try: import spaces @spaces.GPU(duration=60) # minimal GPU endpoint; no tensors allocated def gpu_entrypoint() -> str: return "gpu: ready" except Exception: def gpu_entrypoint() -> str: return "gpu: not available (CPU only)" # ---------- Publisher & Service ---------- publisher = RabbitRepo(external_source="https://space.external") service = LLMService(publisher, default_runner_factory) # ---------- Handlers (.NET FuncName -> service) ---------- async def h_start(data): await service.StartProcess(data or {}) async def h_user(data): await service.UserInput(data or {}) async def h_remove(data): await service.RemoveSession(data or {}) async def h_stop(data): await service.StopRequest(data or {}) async def h_qir(data): await service.QueryIndexResult(data or {}) async def h_getreg(_): await service.GetFunctionRegistry(False) async def h_getreg_f(_): await service.GetFunctionRegistry(True) handlers = { "llmStartSession": h_start, "llmUserInput": h_user, "llmRemoveSession": h_remove, "llmStopRequest": h_stop, "queryIndexResult": h_qir, "getFunctionRegistry": h_getreg, "getFunctionRegistryFiltered": h_getreg_f, } # ---------- Listener wiring ---------- base = RabbitBase() listener = RabbitListenerBase( base, instance_name=settings.RABBIT_INSTANCE_NAME, # queue prefix like your .NET instance handlers=handlers, ) # Mirror your C# InitRabbitMQObjs() DECLS: List[Dict[str, Any]] = [ {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput", "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmRemoveSession{settings.SERVICE_ID}", "FuncName": "llmRemoveSession", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"llmStopRequest{settings.SERVICE_ID}", "FuncName": "llmStopRequest", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"queryIndexResult{settings.SERVICE_ID}", "FuncName": "queryIndexResult", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistry{settings.SERVICE_ID}", "FuncName": "getFunctionRegistry", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, {"ExchangeName": f"getFunctionRegistryFiltered{settings.SERVICE_ID}", "FuncName": "getFunctionRegistryFiltered", "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]}, ] # ---------- Gradio callbacks ---------- async def ping() -> str: return "ok" # Start RabbitMQ when the Gradio app loads. Return a short status string. async def _startup_init(): try: await publisher.connect() await service.init() await listener.start(DECLS) return "listener: ready" except Exception as e: log.exception("Startup init failed") return f"listener: ERROR -> {e}" # ---------- Build the actual page ---------- with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo: gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)") with gr.Tabs(): with gr.Tab("Service"): with gr.Row(): btn = gr.Button("Ping") out = gr.Textbox(label="Ping result") btn.click(ping, inputs=None, outputs=out) # show init status when page loads init_status = gr.Textbox(label="Startup status", interactive=False) demo.load(fn=_startup_init, inputs=None, outputs=init_status) with gr.Tab("@spaces.GPU Probe"): gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.") with gr.Row(): gpu_btn = gr.Button("GPU Ready Probe", variant="primary") gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False) # IMPORTANT: reference the decorated function DIRECTLY gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out) # On HF Spaces, Gradio serves the app automatically if the variable is named `demo`. if __name__ == "__main__": # Local testing only. demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)