Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Aug 14

Commit

b843648

1 Parent(s): e4fb2dd

.

Browse files

Files changed (1) hide show

app.py +57 -75

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# app.py
 import asyncio
-from contextlib import asynccontextmanager
 import gradio as gr
-from fastapi import FastAPI
 from config import settings
 from rabbit_base import RabbitBase
@@ -12,30 +11,22 @@ from rabbit_repo import RabbitRepo
 from service import LLMService
 from runners.base import ILLMRunner
-# =========================
-# @spaces.GPU() SECTION
-# =========================
-# Mirrors the working Space: define a concrete GPU-decorated fn that Gradio calls.
 try:
     import spaces
-    ZERO_GPU_AVAILABLE = True
-    @spaces.GPU(duration=120)  # trivial GPU entrypoint; detector-friendly
-    def gpu_entrypoint():
-        """
-        Minimal GPU function so ZeroGPU sees a GPU endpoint.
-        Replace the body later with real CUDA work as needed.
-        """
-        return "gpu: ready"
 except Exception:
-    ZERO_GPU_AVAILABLE = False
-    def gpu_entrypoint():
         return "gpu: not available (CPU only)"
-# ---------------- Runner factory (stub) ----------------
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
     async def StartProcess(self, llmServiceObj: dict): pass
@@ -46,12 +37,11 @@ class EchoRunner(ILLMRunner):
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
-# ---------------- Publisher and Service ----------------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
-# ---------------- Handlers (.NET FuncName -> service) ----------------
 async def h_start(data):  await service.StartProcess(data or {})
 async def h_user(data):   await service.UserInput(data or {})
 async def h_remove(data): await service.RemoveSession(data or {})
@@ -70,7 +60,7 @@ handlers = {
     "getFunctionRegistryFiltered": h_getreg_f,
 }
-# ---------------- Listener wiring ----------------
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
@@ -78,8 +68,8 @@ listener = RabbitListenerBase(
     handlers=handlers,
 )
-# Declarations mirror your C# InitRabbitMQObjs()
-DECLS = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
@@ -96,53 +86,45 @@ DECLS = [
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
-# ---------------- Gradio UI (smoke test + GPU button) ----------------
-async def ping():
     return "ok"
-with gr.Blocks() as demo:
-    gr.Markdown("### LLM Runner (Python) — RabbitMQ listener (ZeroGPU-ready)")
-    with gr.Row():
-        btn = gr.Button("Ping")
-        out = gr.Textbox(label="Ping result")
-    btn.click(ping, inputs=None, outputs=out)
-    # Reference the GPU-decorated function **directly** (no lambda)
-    with gr.Row():
-        gpu_btn = gr.Button("GPU Ready Probe")
-        gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
-    gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
-# ---------------- FastAPI + lifespan ----------------
-@asynccontextmanager
-async def lifespan(_app: FastAPI):
-    # startup
-    await publisher.connect()
-    await service.init()
-    await listener.start(DECLS)
-    yield
-    # shutdown (optional)
-    # await publisher.close()
-    # await listener.stop()
-app = FastAPI(lifespan=lifespan)
-app = gr.mount_gradio_app(app, demo, path="/")
-@app.get("/health")
-async def health():
-    return {"status": "ok"}
-# Also expose the probe via HTTP (extra-safe for detectors)
-@app.get("/gpu-probe")
-def gpu_probe_route():
-    return {"status": gpu_entrypoint()}
 if __name__ == "__main__":
-    # For local runs; on HF Spaces, the SDK manages the server.
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+# app.py (Gradio-only, ZeroGPU-safe)
 import asyncio
+import logging
+from typing import Any, Dict, List
 import gradio as gr
 from config import settings
 from rabbit_base import RabbitBase
 from service import LLMService
 from runners.base import ILLMRunner
+# ---------- logging ----------
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
+log = logging.getLogger("app")
+# ---------- @spaces.GPU entrypoint ----------
 try:
     import spaces
+    @spaces.GPU(duration=60)  # minimal GPU endpoint; no tensors allocated
+    def gpu_entrypoint() -> str:
+        return "gpu: ready"
 except Exception:
+    def gpu_entrypoint() -> str:
         return "gpu: not available (CPU only)"
+# ---------- Runner factory (stub) ----------
 class EchoRunner(ILLMRunner):
     Type = "EchoRunner"
     async def StartProcess(self, llmServiceObj: dict): pass
 async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
     return EchoRunner()
+# ---------- Publisher & Service ----------
 publisher = RabbitRepo(external_source="https://space.external")
 service = LLMService(publisher, runner_factory)
+# ---------- Handlers (.NET FuncName -> service) ----------
 async def h_start(data):  await service.StartProcess(data or {})
 async def h_user(data):   await service.UserInput(data or {})
 async def h_remove(data): await service.RemoveSession(data or {})
     "getFunctionRegistryFiltered": h_getreg_f,
 }
+# ---------- Listener wiring ----------
 base = RabbitBase()
 listener = RabbitListenerBase(
     base,
     handlers=handlers,
 )
+# Mirror your C# InitRabbitMQObjs()
+DECLS: List[Dict[str, Any]] = [
     {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
      "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
     {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
      "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
 ]
+# ---------- Gradio callbacks ----------
+async def ping() -> str:
     return "ok"
+# Start RabbitMQ when the Gradio app loads. Return a short status string.
+async def _startup_init():
+    try:
+        await publisher.connect()
+        await service.init()
+        await listener.start(DECLS)
+        return "listener: ready"
+    except Exception as e:
+        log.exception("Startup init failed")
+        return f"listener: ERROR -> {e}"
+# ---------- Build the actual page ----------
+with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)")
+    with gr.Tabs():
+        with gr.Tab("Service"):
+            with gr.Row():
+                btn = gr.Button("Ping")
+                out = gr.Textbox(label="Ping result")
+            btn.click(ping, inputs=None, outputs=out)
+            # show init status when page loads
+            init_status = gr.Textbox(label="Startup status", interactive=False)
+            demo.load(fn=_startup_init, inputs=None, outputs=init_status)
+        with gr.Tab("@spaces.GPU Probe"):
+            gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.")
+            with gr.Row():
+                gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
+                gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
+            # IMPORTANT: reference the decorated function DIRECTLY
+            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
+# On HF Spaces, Gradio serves the app automatically if the variable is named `demo`.
 if __name__ == "__main__":
+    # Local testing only.
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)