johnbridges commited on
Commit
b843648
·
1 Parent(s): e4fb2dd
Files changed (1) hide show
  1. app.py +57 -75
app.py CHANGED
@@ -1,9 +1,8 @@
1
- # app.py
2
  import asyncio
3
- from contextlib import asynccontextmanager
4
-
5
  import gradio as gr
6
- from fastapi import FastAPI
7
 
8
  from config import settings
9
  from rabbit_base import RabbitBase
@@ -12,30 +11,22 @@ from rabbit_repo import RabbitRepo
12
  from service import LLMService
13
  from runners.base import ILLMRunner
14
 
15
- # =========================
16
- # @spaces.GPU() SECTION
17
- # =========================
18
- # Mirrors the working Space: define a concrete GPU-decorated fn that Gradio calls.
 
19
  try:
20
  import spaces
21
- ZERO_GPU_AVAILABLE = True
22
-
23
- @spaces.GPU(duration=120) # trivial GPU entrypoint; detector-friendly
24
- def gpu_entrypoint():
25
- """
26
- Minimal GPU function so ZeroGPU sees a GPU endpoint.
27
- Replace the body later with real CUDA work as needed.
28
- """
29
- return "gpu: ready"
30
 
 
 
 
31
  except Exception:
32
- ZERO_GPU_AVAILABLE = False
33
-
34
- def gpu_entrypoint():
35
  return "gpu: not available (CPU only)"
36
 
37
-
38
- # ---------------- Runner factory (stub) ----------------
39
  class EchoRunner(ILLMRunner):
40
  Type = "EchoRunner"
41
  async def StartProcess(self, llmServiceObj: dict): pass
@@ -46,12 +37,11 @@ class EchoRunner(ILLMRunner):
46
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
47
  return EchoRunner()
48
 
49
-
50
- # ---------------- Publisher and Service ----------------
51
  publisher = RabbitRepo(external_source="https://space.external")
52
  service = LLMService(publisher, runner_factory)
53
 
54
- # ---------------- Handlers (.NET FuncName -> service) ----------------
55
  async def h_start(data): await service.StartProcess(data or {})
56
  async def h_user(data): await service.UserInput(data or {})
57
  async def h_remove(data): await service.RemoveSession(data or {})
@@ -70,7 +60,7 @@ handlers = {
70
  "getFunctionRegistryFiltered": h_getreg_f,
71
  }
72
 
73
- # ---------------- Listener wiring ----------------
74
  base = RabbitBase()
75
  listener = RabbitListenerBase(
76
  base,
@@ -78,8 +68,8 @@ listener = RabbitListenerBase(
78
  handlers=handlers,
79
  )
80
 
81
- # Declarations mirror your C# InitRabbitMQObjs()
82
- DECLS = [
83
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
84
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
85
  {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
@@ -96,53 +86,45 @@ DECLS = [
96
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
97
  ]
98
 
99
-
100
- # ---------------- Gradio UI (smoke test + GPU button) ----------------
101
- async def ping():
102
  return "ok"
103
 
104
- with gr.Blocks() as demo:
105
- gr.Markdown("### LLM Runner (Python) — RabbitMQ listener (ZeroGPU-ready)")
106
-
107
- with gr.Row():
108
- btn = gr.Button("Ping")
109
- out = gr.Textbox(label="Ping result")
110
- btn.click(ping, inputs=None, outputs=out)
111
-
112
- # Reference the GPU-decorated function **directly** (no lambda)
113
- with gr.Row():
114
- gpu_btn = gr.Button("GPU Ready Probe")
115
- gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
116
- gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
117
-
118
-
119
- # ---------------- FastAPI + lifespan ----------------
120
- @asynccontextmanager
121
- async def lifespan(_app: FastAPI):
122
- # startup
123
- await publisher.connect()
124
- await service.init()
125
- await listener.start(DECLS)
126
- yield
127
- # shutdown (optional)
128
- # await publisher.close()
129
- # await listener.stop()
130
-
131
- app = FastAPI(lifespan=lifespan)
132
- app = gr.mount_gradio_app(app, demo, path="/")
133
-
134
- @app.get("/health")
135
- async def health():
136
- return {"status": "ok"}
137
-
138
- # Also expose the probe via HTTP (extra-safe for detectors)
139
- @app.get("/gpu-probe")
140
- def gpu_probe_route():
141
- return {"status": gpu_entrypoint()}
142
-
143
-
144
  if __name__ == "__main__":
145
- # For local runs; on HF Spaces, the SDK manages the server.
146
- import uvicorn
147
- uvicorn.run(app, host="0.0.0.0", port=7860)
148
-
 
1
+ # app.py (Gradio-only, ZeroGPU-safe)
2
  import asyncio
3
+ import logging
4
+ from typing import Any, Dict, List
5
  import gradio as gr
 
6
 
7
  from config import settings
8
  from rabbit_base import RabbitBase
 
11
  from service import LLMService
12
  from runners.base import ILLMRunner
13
 
14
+ # ---------- logging ----------
15
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
16
+ log = logging.getLogger("app")
17
+
18
+ # ---------- @spaces.GPU entrypoint ----------
19
  try:
20
  import spaces
 
 
 
 
 
 
 
 
 
21
 
22
+ @spaces.GPU(duration=60) # minimal GPU endpoint; no tensors allocated
23
+ def gpu_entrypoint() -> str:
24
+ return "gpu: ready"
25
  except Exception:
26
+ def gpu_entrypoint() -> str:
 
 
27
  return "gpu: not available (CPU only)"
28
 
29
+ # ---------- Runner factory (stub) ----------
 
30
  class EchoRunner(ILLMRunner):
31
  Type = "EchoRunner"
32
  async def StartProcess(self, llmServiceObj: dict): pass
 
37
  async def runner_factory(llmServiceObj: dict) -> ILLMRunner:
38
  return EchoRunner()
39
 
40
+ # ---------- Publisher & Service ----------
 
41
  publisher = RabbitRepo(external_source="https://space.external")
42
  service = LLMService(publisher, runner_factory)
43
 
44
+ # ---------- Handlers (.NET FuncName -> service) ----------
45
  async def h_start(data): await service.StartProcess(data or {})
46
  async def h_user(data): await service.UserInput(data or {})
47
  async def h_remove(data): await service.RemoveSession(data or {})
 
60
  "getFunctionRegistryFiltered": h_getreg_f,
61
  }
62
 
63
+ # ---------- Listener wiring ----------
64
  base = RabbitBase()
65
  listener = RabbitListenerBase(
66
  base,
 
68
  handlers=handlers,
69
  )
70
 
71
+ # Mirror your C# InitRabbitMQObjs()
72
+ DECLS: List[Dict[str, Any]] = [
73
  {"ExchangeName": f"llmStartSession{settings.SERVICE_ID}", "FuncName": "llmStartSession",
74
  "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
75
  {"ExchangeName": f"llmUserInput{settings.SERVICE_ID}", "FuncName": "llmUserInput",
 
86
  "MessageTimeout": 60_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
87
  ]
88
 
89
+ # ---------- Gradio callbacks ----------
90
+ async def ping() -> str:
 
91
  return "ok"
92
 
93
+ # Start RabbitMQ when the Gradio app loads. Return a short status string.
94
+ async def _startup_init():
95
+ try:
96
+ await publisher.connect()
97
+ await service.init()
98
+ await listener.start(DECLS)
99
+ return "listener: ready"
100
+ except Exception as e:
101
+ log.exception("Startup init failed")
102
+ return f"listener: ERROR -> {e}"
103
+
104
+ # ---------- Build the actual page ----------
105
+ with gr.Blocks(title="LLM Runner (ZeroGPU-ready)", theme=gr.themes.Soft()) as demo:
106
+ gr.Markdown("## LLM Runner — RabbitMQ listener (ZeroGPU-ready)")
107
+
108
+ with gr.Tabs():
109
+ with gr.Tab("Service"):
110
+ with gr.Row():
111
+ btn = gr.Button("Ping")
112
+ out = gr.Textbox(label="Ping result")
113
+ btn.click(ping, inputs=None, outputs=out)
114
+
115
+ # show init status when page loads
116
+ init_status = gr.Textbox(label="Startup status", interactive=False)
117
+ demo.load(fn=_startup_init, inputs=None, outputs=init_status)
118
+
119
+ with gr.Tab("@spaces.GPU Probe"):
120
+ gr.Markdown("This button is a real `@spaces.GPU()` entrypoint so ZeroGPU keeps the Space alive.")
121
+ with gr.Row():
122
+ gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
123
+ gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
124
+ # IMPORTANT: reference the decorated function DIRECTLY
125
+ gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
126
+
127
+ # On HF Spaces, Gradio serves the app automatically if the variable is named `demo`.
 
 
 
 
 
128
  if __name__ == "__main__":
129
+ # Local testing only.
130
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)