Spaces:

Mungert
/

GradLLM

Running

App Files Files Community

johnbridges commited on Sep 18

Commit

d74feed

1 Parent(s): 3cce116

.

Browse files

Files changed (1) hide show

app.py +87 -256

app.py CHANGED Viewed

@@ -1,259 +1,90 @@
-# timesfm_backend.py
-import time
-import json
-import logging
-from typing import Any, Dict, List, Optional, Tuple
-import numpy as np
-from backends_base import ChatBackend, ImagesBackend  # ChatBackend for OA server
 from config import settings
-logger = logging.getLogger(__name__)
-# Try to import TimesFM. If not present, we fall back to a naive forecaster.
-_TIMESFM_AVAILABLE = False
-_TFM = None
 try:
-    # google timesfm 2.5 requires `pip install timesfm`
-    # model class name can be TimesFm (library-dependent)
-    from timesfm import TimesFm  # type: ignore
-    _TIMESFM_AVAILABLE = True
-except Exception as e:
-    logger.warning("timesfm not available (%s) — will use naive fallback.", e)
-def _parse_series(series: Any) -> np.ndarray:
-    """
-    Accepts list[float], list[int], list[dict{value:..}], or dict with 'values'.
-    Returns a 1D float numpy array. Raises ValueError on empty/invalid.
-    """
-    if series is None:
-        raise ValueError("series is required")
-    if isinstance(series, dict):
-        if "values" in series:
-            series = series["values"]
-        elif "y" in series:
-            series = series["y"]
-    vals: List[float] = []
-    if isinstance(series, (list, tuple)):
-        if series and isinstance(series[0], dict):
-            # e.g. [{"t": "...", "y": 1.2}, ...] or {"value": ...}
-            for item in series:
-                if "y" in item:
-                    vals.append(float(item["y"]))
-                elif "value" in item:
-                    vals.append(float(item["value"]))
-        else:
-            # numeric list
-            vals = [float(x) for x in series]
-    else:
-        raise ValueError("series must be a list/tuple or dict with 'values'/'y'")
-    if not vals:
-        raise ValueError("series is empty")
-    return np.asarray(vals, dtype=np.float32)
-def _fallback_forecast(y: np.ndarray, horizon: int) -> np.ndarray:
-    """
-    Very small, dependency-free fallback:
-    - if length >= 4: mean of last 4 points
-    - else: mean of all points
-    """
-    if horizon <= 0:
-        return np.zeros((0,), dtype=np.float32)
-    k = 4 if y.shape[0] >= 4 else y.shape[0]
-    base = float(np.mean(y[-k:]))
-    return np.full((horizon,), base, dtype=np.float32)
-class TimesFMBackend(ChatBackend):
-    """
-    Chat-compatible backend (for oa_server) wrapping TimesFM (if installed).
-    If TimesFM is missing, uses a naive statistical fallback.
-    """
-    def __init__(self,
-                 model_id: Optional[str] = None,
-                 device: Optional[str] = None):
-        """
-        model_id: optional identifier for logs/metadata
-        device: 'cpu' or 'cuda' (passed to TimesFm if supported by installed lib)
-        """
-        self.model_id = model_id or "google/timesfm-2.5-200m-pytorch"
-        self.device = device or "cpu"
-        self._model = None  # lazy init
-    # ---------- internal ----------
-    def _ensure_model(self):
-        if self._model is not None or not _TIMESFM_AVAILABLE:
-            return
-        try:
-            # minimal init; adjust kwargs if your installed version needs different args
-            self._model = TimesFm()  # type: ignore
-            logger.info("TimesFM model initialized.")
-        except Exception as e:
-            logger.exception("Failed to initialize TimesFM; will use fallback. %s", e)
-            self._model = None
-    # ---------- public helpers ----------
-    async def forecast(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Unified forecast entrypoint.
-        Expected keys (directly in payload OR nested under 'data' OR 'timeseries'):
-          - series: list of numbers (or list of dicts holding 'y'/'value')
-          - horizon: int (>0)
-          - freq: optional string for metadata only
-        Returns:
-          {
-            "model": "...",
-            "horizon": int,
-            "freq": str|None,
-            "forecast": [floats],
-            "note": str|None
-          }
-        """
-        # unwrap if nested
-        if "data" in payload and isinstance(payload["data"], dict):
-            payload = {**payload, **payload["data"]}
-        if "timeseries" in payload and isinstance(payload["timeseries"], dict):
-            payload = {**payload, **payload["timeseries"]}
-        series = payload.get("series")
-        horizon = int(payload.get("horizon", 0))
-        freq = payload.get("freq")
-        y = _parse_series(series)
-        if horizon <= 0:
-            raise ValueError("horizon must be a positive integer")
-        self._ensure_model()
-        if _TIMESFM_AVAILABLE and self._model is not None:
-            # Use real TimesFM
-            try:
-                # Most TimesFM APIs are batch-oriented; we add a batch dim and remove it later
-                # If your installed version differs (e.g., .predict with signature),
-                # change these two lines accordingly:
-                y_batch = y[None, :]
-                preds = self._model.predict(y_batch, horizon=horizon)  # type: ignore
-                # preds shape => (1, horizon)
-                fc = np.asarray(preds).reshape(-1).tolist()
-                note = None
-            except Exception as e:
-                logger.exception("TimesFM predict failed; falling back. %s", e)
-                fc = _fallback_forecast(y, horizon).tolist()
-                note = "fallback_used_due_to_predict_error"
-        else:
-            # Fallback path
-            fc = _fallback_forecast(y, horizon).tolist()
-            note = "fallback_used_timesfm_missing"
-        return {
-            "model": self.model_id,
-            "horizon": horizon,
-            "freq": freq,
-            "forecast": fc,
-            "note": note,
-        }
-    # ---------- ChatBackend interface (for oa_server) ----------
-    async def stream(self, request: Dict[str, Any]):
-        """
-        OA-compatible streaming shim:
-        - Extracts forecast inputs from request (or from last user message JSON).
-        - Runs forecast() and yields ONE OpenAI-style chat chunk whose content
-          is a compact JSON string with the forecast result.
-        """
-        rid = f"chatcmpl-timesfm-{int(time.time())}"
-        now = int(time.time())
-        # try to gather payload
-        payload: Dict[str, Any] = {}
-        # 1) allow direct shape: {series, horizon, ...} / or under 'data'/'timeseries'
-        if isinstance(request, dict):
-            payload = dict(request)  # shallow copy
-        # 2) optionally parse last user message if it's JSON
-        try:
-            msgs = request.get("messages") if isinstance(request, dict) else None
-            if isinstance(msgs, list) and msgs:
-                for m in reversed(msgs):
-                    if isinstance(m, dict) and m.get("role") == "user":
-                        c = m.get("content")
-                        if isinstance(c, str):
-                            c_str = c.strip()
-                            if (c_str.startswith("{") and c_str.endswith("}")) or (
-                                c_str.startswith("[") and c_str.endswith("]")
-                            ):
-                                # try parse JSON content
-                                parsed = json.loads(c_str)
-                                if isinstance(parsed, dict):
-                                    payload.update(parsed)
-                        break
-        except Exception:
-            # non-fatal: keep whatever we had
-            pass
-        # run forecast
-        try:
-            result = await self.forecast(payload)
-        except Exception as e:
-            # return an error chunk in OpenAI shape
-            err = {"error": str(e)}
-            content = json.dumps(err, separators=(",", ":"), ensure_ascii=False)
-            yield {
-                "id": rid,
-                "object": "chat.completion.chunk",
-                "created": now,
-                "model": self.model_id,
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"role": "assistant", "content": content},
-                        "finish_reason": "stop",
-                    }
-                ],
-            }
-            return
-        # success: compact JSON content so your .NET can parse
-        content = json.dumps(
-            {
-                "model": result.get("model"),
-                "horizon": result.get("horizon"),
-                "freq": result.get("freq"),
-                "forecast": result.get("forecast"),
-                "note": result.get("note"),
-                "backend": "timesfm",
-            },
-            separators=(",", ":"),
-            ensure_ascii=False,
-        )
-        yield {
-            "id": rid,
-            "object": "chat.completion.chunk",
-            "created": now,
-            "model": self.model_id,
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {"role": "assistant", "content": content},
-                    "finish_reason": "stop",
-                }
-            ],
-        }
-# Optional: keep an images stub to satisfy oa_server wiring if needed elsewhere
-class StubImagesBackend(ImagesBackend):
-    async def generate_b64(self, request: Dict[str, Any]) -> str:
-        logger.warning("Image generation not supported in TimesFM backend.")
-        return (
-            "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4BwQACfsD/etCJH0AAAAASUVORK5CYII="
-        )

+# app.py
+import asyncio, logging
+import gradio as gr
 from config import settings
+from rabbit_base import RabbitBase
+from listener import RabbitListenerBase
+from rabbit_repo import RabbitRepo
+from oa_server import OpenAIServers
+#from vllm_backend import VLLMChatBackend, StubImagesBackend
+#from transformers_backend import TransformersChatBackend, StubImagesBackend
+#from hf_backend import HFChatBackend, StubImagesBackend
+from hf_backend import StubImagesBackend
+from timesfm_backend import TimesFMBackend
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)
+log = logging.getLogger("app")
+# ----------------- Hugging Face Spaces helpers -----------------
 try:
+    import spaces
+    @spaces.GPU(duration=60)
+    def gpu_entrypoint() -> str:
+        return "gpu: ready"
+except Exception:
+    def gpu_entrypoint() -> str:
+        return "gpu: not available (CPU only)"
+# ----------------- RabbitMQ wiring -----------------
+publisher = RabbitRepo(external_source="openai.mq.server")
+resolver = (lambda name: "direct" if name.startswith("oa.") else settings.RABBIT_EXCHANGE_TYPE)
+base = RabbitBase(exchange_type_resolver=resolver)
+servers = OpenAIServers(
+    publisher,
+    chat_backend=TimesFMBackend(),
+    images_backend=StubImagesBackend()
+)
+handlers = {
+    "oaChatCreate": servers.handle_chat_create,
+    "oaImagesGenerate": servers.handle_images_generate,
+}
+DECLS = [
+    {"ExchangeName": "oa.chat.create", "FuncName": "oaChatCreate",
+     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
+    {"ExchangeName": "oa.images.generate", "FuncName": "oaImagesGenerate",
+     "MessageTimeout": 600_000, "RoutingKeys": [settings.RABBIT_ROUTING_KEY]},
+]
+listener = RabbitListenerBase(base, instance_name=settings.RABBIT_INSTANCE_NAME, handlers=handlers)
+# ----------------- Startup init -----------------
+async def _startup_init():
+    try:
+        await base.connect()          # connect to RabbitMQ
+        await listener.start(DECLS)   # start queue listeners
+        return "OpenAI MQ + vLLM: ready"
+    except Exception as e:
+        log.exception("Startup init failed")
+        return f"ERROR: {e}"
+async def ping():
+    return "ok"
+# ----------------- Gradio UI -----------------
+with gr.Blocks(title="OpenAI over RabbitMQ (local vLLM)", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## OpenAI-compatible over RabbitMQ — using vLLM locally inside Space")
+    with gr.Tabs():
+        with gr.Tab("Service"):
+            btn = gr.Button("Ping")
+            out = gr.Textbox(label="Ping result")
+            btn.click(ping, inputs=None, outputs=out)
+            init_status = gr.Textbox(label="Startup status", interactive=False)
+            demo.load(fn=_startup_init, inputs=None, outputs=init_status)
+        with gr.Tab("@spaces.GPU Probe"):
+            gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
+            gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
+            gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True, mcp_server=True)