import gradio as gr import cv2 import mediapipe as mp import torch import torchvision.transforms as T from torchvision import models import numpy as np from PIL import Image import math # ------------------------------ # Load DeepLabV3 model # ------------------------------ def load_deeplab(): model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval() transform = T.Compose([ T.Resize(520), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) return model, transform model, transform = load_deeplab() # MediaPipe pose mp_pose = mp.solutions.pose pose = mp_pose.Pose(static_image_mode=True) # ------------------------------ # Helper functions # ------------------------------ def euclidean(p1, p2): return math.dist(p1, p2) def row_body_edges(mask, y): """Find left and right edges of the body at row y from mask.""" if y < 0 or y >= mask.shape[0]: return None, None row = mask[y, :] cols = np.where(row > 0)[0] if len(cols) == 0: return None, None return int(cols.min()), int(cols.max()) def x_on_line_at_y(x1, y1, x2, y2, y): """Linear interpolate x at given y on line (x1,y1)->(x2,y2).""" # if vertical segment or degenerate, return midpoint x if y2 == y1: return int(round((x1 + x2) / 2)) t = (y - y1) / (y2 - y1) x = x1 + t * (x2 - x1) return int(round(x)) def clamp_x(x, w): return max(0, min(w - 1, int(round(x)))) # ------------------------------ # Main processing # ------------------------------ def process_image(image_pil, real_height_cm): image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR) h, w, _ = image_cv.shape # Segment with DeepLab input_tensor = transform(image_pil).unsqueeze(0) with torch.no_grad(): output = model(input_tensor)['out'][0] pred = output.argmax(0).byte().cpu().numpy() mask = (pred == 15).astype(np.uint8) mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) # MediaPipe Pose results = pose.process(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB)) if not results.pose_landmarks: return {"error": "No person/landmarks detected"}, image_pil lm = results.pose_landmarks.landmark # helper to get (x,y) from landmark enum def pt(enum_landmark): L = lm[enum_landmark.value] return int(L.x * w), int(L.y * h) # Key landmarks (left/right shoulder & hip etc.) L_SHOULDER = pt(mp_pose.PoseLandmark.LEFT_SHOULDER) R_SHOULDER = pt(mp_pose.PoseLandmark.RIGHT_SHOULDER) L_ELBOW = pt(mp_pose.PoseLandmark.LEFT_ELBOW) L_HIP = pt(mp_pose.PoseLandmark.LEFT_HIP) R_HIP = pt(mp_pose.PoseLandmark.RIGHT_HIP) L_WRIST = pt(mp_pose.PoseLandmark.LEFT_WRIST) L_ANKLE = pt(mp_pose.PoseLandmark.LEFT_ANKLE) NOSE = pt(mp_pose.PoseLandmark.NOSE) # Scale factor (px -> cm) pixel_height = L_ANKLE[1] - NOSE[1] if pixel_height <= 0: return {"error": "Invalid body height detected in image (make sure full body is visible)."}, image_pil scale = real_height_cm / pixel_height # Shoulder (from mask) y_shoulder = int((L_SHOULDER[1] + R_SHOULDER[1]) / 2) left_s, right_s = row_body_edges(mask_resized, y_shoulder) shoulder_px = (right_s - left_s) if (left_s is not None and right_s is not None) else None # Hip (from mask) y_hip = int((L_HIP[1] + R_HIP[1]) / 2) left_h, right_h = row_body_edges(mask_resized, y_hip) hip_px = (right_h - left_h) if (left_h is not None and right_h is not None) else None # Waist (landmarks-only using shoulder->hip lines) # compute midpoint y for left and right shoulder->hip, then average them y_mid_left = (L_SHOULDER[1] + L_HIP[1]) / 2 y_mid_right = (R_SHOULDER[1] + R_HIP[1]) / 2 y_waist = int(round((y_mid_left + y_mid_right) / 2)) # compute x at y_waist along each side's shoulder->hip line x_left = x_on_line_at_y(L_SHOULDER[0], L_SHOULDER[1], L_HIP[0], L_HIP[1], y_waist) x_right = x_on_line_at_y(R_SHOULDER[0], R_SHOULDER[1], R_HIP[0], R_HIP[1], y_waist) x_left = clamp_x(x_left, w) x_right = clamp_x(x_right, w) waist_px = abs(x_right - x_left) # One thigh (left thigh width just below hip) - still from mask thigh_y = int(L_HIP[1] + 0.15 * pixel_height) left_t, right_t = row_body_edges(mask_resized, thigh_y) left_thigh_px = None if left_t is not None and right_t is not None and (L_HIP[0] >= left_t and L_HIP[0] <= right_t): left_thigh_px = L_HIP[0] - left_t # Lengths torso_px = euclidean(((L_SHOULDER[0] + R_SHOULDER[0]) // 2, y_shoulder), ((L_HIP[0] + R_HIP[0]) // 2, y_hip)) arm_px = euclidean(L_SHOULDER, L_ELBOW) + euclidean(L_ELBOW, L_WRIST) leg_px = euclidean(L_HIP, L_ANKLE) # Convert to cm (None-safe) def cm(px): return round(px * scale, 1) if (px is not None) else None measurements = { "Shoulder width (cm)": cm(shoulder_px), "Waist width (cm)": cm(waist_px), "Hip width (cm)": cm(hip_px), "Left thigh width (cm)": cm(left_thigh_px), "Torso length (cm)": cm(torso_px), "Arm length (cm)": cm(arm_px), "Leg length (cm)": cm(leg_px), } # Visualization: draw landmarks, shoulder/hip horizontal mask-lines, shoulder->hip diagonals, waist line (landmarks-only) vis = image_cv.copy() # draw MediaPipe landmarks (green) for lmk in lm: cx, cy = int(lmk.x * w), int(lmk.y * h) cv2.circle(vis, (cx, cy), 3, (0, 255, 0), -1) # draw shoulder and hip horizontal mask-based lines (if available) if left_s is not None and right_s is not None: cv2.line(vis, (left_s, y_shoulder), (right_s, y_shoulder), (255, 0, 0), 2) # blue if left_h is not None and right_h is not None: cv2.line(vis, (left_h, y_hip), (right_h, y_hip), (0, 0, 255), 2) # red # draw shoulder->hip diagonals (landmarks) cv2.line(vis, (L_SHOULDER[0], L_SHOULDER[1]), (L_HIP[0], L_HIP[1]), (0, 255, 255), 2) # left diagonal, cyan cv2.line(vis, (R_SHOULDER[0], R_SHOULDER[1]), (R_HIP[0], R_HIP[1]), (0, 255, 255), 2) # right diagonal, cyan # draw waist line computed from diagonal interpolation cv2.line(vis, (x_left, y_waist), (x_right, y_waist), (0, 255, 255), 3) # yellow/cyan vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB) vis_pil = Image.fromarray(vis) return measurements, vis_pil # ------------------------------ # Gradio Interface # ------------------------------ def predict(image, height_cm): result, vis = process_image(image, height_cm) return result, vis demo = gr.Interface( fn=predict, inputs=[ gr.Image(type="pil", label="Upload full body image"), gr.Number(label="Height (cm)", value=170) ], outputs=[ gr.JSON(label="Estimated Measurements"), gr.Image(type="pil", label="Landmarks + Measurement Lines") ], title="👕 AI Clothing Size Estimator", description="Upload a full-body photo and input your height (cm). Shoulders (blue), Waist (landmarks-only line), Hip (red)." ) if __name__ == "__main__": demo.launch()