DA-2

Running

App Files Files Community

fcu52005505 commited on 23 days ago

Commit

9701a8b

verified ·

1 Parent(s): 05c1031

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -22

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ from da2.utils.base import load_config
 from da2.utils.model import load_model
 from da2.utils.io import read_cv2_image, torch_transform, tensorize
-# 引入真正的 Accelerator 來解決 logging 報錯
 from accelerate import Accelerator
 from accelerate.logging import get_logger
@@ -18,14 +18,13 @@ from accelerate.logging import get_logger
 # Global Initialization (Run once at startup)
 # ============================================================
 def initialize_app(config_path="configs/infer.json"):
-    # 1. 初始化真正的 Accelerator
-    # 它會自動偵測環境 (CPU 或 GPU) 並初始化全局狀態，解決 logging 錯誤
     accelerator = Accelerator()
     # 2. 載入 Config
     config = load_config(config_path)
-    # 設定 Logger (這時 accelerate 已經初始化，get_logger 會正常運作)
     logger = get_logger(__name__, log_level="INFO")
     config.setdefault("env", {})
     config["env"]["logger"] = logger
@@ -33,8 +32,7 @@ def initialize_app(config_path="configs/infer.json"):
     accelerator.print(f"Running on device: {accelerator.device}")
-    # 3. 載入模型 (Global Load)
-    # 注意：我們在這裡就載入模型，而不是在推論時才載入
     model = load_model(config, accelerator)
     model = model.to(accelerator.device)
     model.eval()
@@ -47,14 +45,12 @@ try:
     print("Model loaded successfully!")
 except Exception as e:
     print(f"Error loading model: {e}")
-    # 為了防止 Build 失敗，這裡可以設為 None，但在推論時會報錯
     CONFIG, ACCELERATOR, MODEL = None, None, None
 # ============================================================
 # Mask loader
 # ============================================================
 def read_mask_demo(mask_path, img_shape):
-    # 處理無 Mask 的情況，回傳全 True 的 Mask
     if mask_path is None:
         if len(img_shape) == 3:
             return np.ones((img_shape[1], img_shape[2]), dtype=bool)
@@ -62,11 +58,15 @@ def read_mask_demo(mask_path, img_shape):
     mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
     if mask is None:
-        # 如果讀取失敗，也回傳全 True
         if len(img_shape) == 3:
             return np.ones((img_shape[1], img_shape[2]), dtype=bool)
         return np.ones(img_shape[:2], dtype=bool)
     return mask > 0
 # ============================================================
@@ -78,40 +78,49 @@ def run_inference_and_save_depth(image_path, mask_path=None):
     device = ACCELERATOR.device
-    # Read image
     cv2_img = read_cv2_image(image_path)
     if cv2_img is None:
         print(f"Error reading image: {image_path}")
         return None, None
     img_tensor = torch_transform(cv2_img)  # CxHxW tensor
-    # 確保 mask 形狀正確
     mask = read_mask_demo(mask_path, img_tensor.shape)
-    # Move to device with expected dtype
     model_dtype = CONFIG.get("spherevit", {}).get("dtype", "float32")
     input_tensor = tensorize(img_tensor, model_dtype, device)
-    # Autocast (GPU only)
     use_autocast = (device.type == "cuda")
     autocast_ctx = torch.autocast(device_type="cuda") if use_autocast else nullcontext()
     with autocast_ctx, torch.no_grad():
-        # Model inference
         pred = MODEL(input_tensor)
-        # Handle tuple outputs
         if isinstance(pred, (tuple, list)):
             pred = pred[0]
-        # Always convert to float32 before numpy to avoid BF16/FP16 errors
         depth = pred.float().cpu().numpy()
-    # Normalize depth -> 8-bit grayscale
     depth = np.squeeze(depth)
-    # 處理 NaN 和極值
     dmin, dmax = float(np.nanmin(depth)), float(np.nanmax(depth))
     if dmax - dmin > 1e-6:
@@ -121,7 +130,7 @@ def run_inference_and_save_depth(image_path, mask_path=None):
     depth_8bit = (depth_norm * 255).astype(np.uint8)
-    # Save output
     os.makedirs("outputs", exist_ok=True)
     base = os.path.splitext(os.path.basename(image_path))[0]
     out_path = f"outputs/{base}_depth.png"
@@ -138,7 +147,6 @@ def gradio_fn(image, mask):
     depth_img, out_path = run_inference_and_save_depth(image, mask)
     return depth_img, out_path
-# 定義 Gradio 介面
 demo = gr.Interface(
     fn=gradio_fn,
     inputs=[
@@ -150,7 +158,7 @@ demo = gr.Interface(
         gr.File(label="Download Depth PNG"),
     ],
     title="DA² — Minimal Depth Demo",
-    description="Upload an image (and optional mask) -> outputs an 8-bit grayscale depth PNG.",
     allow_flagging="never",
 )

 from da2.utils.model import load_model
 from da2.utils.io import read_cv2_image, torch_transform, tensorize
+# 引入真正的 Accelerator
 from accelerate import Accelerator
 from accelerate.logging import get_logger
 # Global Initialization (Run once at startup)
 # ============================================================
 def initialize_app(config_path="configs/infer.json"):
+    # 1. 初始化 Accelerator (解決 logging 報錯)
     accelerator = Accelerator()
     # 2. 載入 Config
     config = load_config(config_path)
+    # 設定 Logger
     logger = get_logger(__name__, log_level="INFO")
     config.setdefault("env", {})
     config["env"]["logger"] = logger
     accelerator.print(f"Running on device: {accelerator.device}")
+    # 3. 載入模型 (Global Load，避免每次推論重載)
     model = load_model(config, accelerator)
     model = model.to(accelerator.device)
     model.eval()
     print("Model loaded successfully!")
 except Exception as e:
     print(f"Error loading model: {e}")
     CONFIG, ACCELERATOR, MODEL = None, None, None
 # ============================================================
 # Mask loader
 # ============================================================
 def read_mask_demo(mask_path, img_shape):
     if mask_path is None:
         if len(img_shape) == 3:
             return np.ones((img_shape[1], img_shape[2]), dtype=bool)
     mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
     if mask is None:
         if len(img_shape) == 3:
             return np.ones((img_shape[1], img_shape[2]), dtype=bool)
         return np.ones(img_shape[:2], dtype=bool)
+    # 確保 Mask 尺寸也跟隨 Tensor (如果需要的話)，但通常 Mask 是跟原圖
+    # 這裡簡單處理，如果尺寸不對稍微 resize 一下避免崩潰
+    if mask.shape[:2] != img_shape[-2:]:
+         mask = cv2.resize(mask, (img_shape[-1], img_shape[-2]), interpolation=cv2.INTER_NEAREST)
     return mask > 0
 # ============================================================
     device = ACCELERATOR.device
+    # 1. 讀取原始圖片
     cv2_img = read_cv2_image(image_path)
     if cv2_img is None:
         print(f"Error reading image: {image_path}")
         return None, None
+    # 【關鍵修復】獲取原始圖片的寬高 (Height, Width)
+    # cv2 shape 是 (H, W, C)
+    original_h, original_w = cv2_img.shape[:2]
+    # 2. 轉換為 Tensor (這步通常會 resize 成 518x518 或其他模型輸入尺寸)
     img_tensor = torch_transform(cv2_img)  # CxHxW tensor
+    # 處理 Mask
     mask = read_mask_demo(mask_path, img_tensor.shape)
+    # 準備輸入
     model_dtype = CONFIG.get("spherevit", {}).get("dtype", "float32")
     input_tensor = tensorize(img_tensor, model_dtype, device)
+    # Autocast
     use_autocast = (device.type == "cuda")
     autocast_ctx = torch.autocast(device_type="cuda") if use_autocast else nullcontext()
+    # 3. 推論
     with autocast_ctx, torch.no_grad():
         pred = MODEL(input_tensor)
         if isinstance(pred, (tuple, list)):
             pred = pred[0]
+        # 轉回 Numpy float32
         depth = pred.float().cpu().numpy()
+    # 4. 後處理
     depth = np.squeeze(depth)
+    # 【關鍵修復】將深度圖 Resize 回原始尺寸
+    # cv2.resize 接受的參數是 (Width, Height)
+    if (depth.shape[0] != original_h) or (depth.shape[1] != original_w):
+        depth = cv2.resize(depth, (original_w, original_h), interpolation=cv2.INTER_CUBIC)
+    # 5. 正規化 (Normalization) -> 8-bit
     dmin, dmax = float(np.nanmin(depth)), float(np.nanmax(depth))
     if dmax - dmin > 1e-6:
     depth_8bit = (depth_norm * 255).astype(np.uint8)
+    # 6. 儲存
     os.makedirs("outputs", exist_ok=True)
     base = os.path.splitext(os.path.basename(image_path))[0]
     out_path = f"outputs/{base}_depth.png"
     depth_img, out_path = run_inference_and_save_depth(image, mask)
     return depth_img, out_path
 demo = gr.Interface(
     fn=gradio_fn,
     inputs=[
         gr.File(label="Download Depth PNG"),
     ],
     title="DA² — Minimal Depth Demo",
+    description="Upload an image (and optional mask) -> outputs an 8-bit grayscale depth PNG (Resized to Original).",
     allow_flagging="never",
 )