neo-decade10's picture
Update app.py
c91a638 verified
import gradio as gr
import cv2
import mediapipe as mp
import torch
import torchvision.transforms as T
from torchvision import models
import numpy as np
from PIL import Image
import math
# ------------------------------
# Load DeepLabV3 model
# ------------------------------
def load_deeplab():
model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
transform = T.Compose([
T.Resize(520),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
return model, transform
model, transform = load_deeplab()
# MediaPipe pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True)
# ------------------------------
# Helper functions
# ------------------------------
def euclidean(p1, p2):
return math.dist(p1, p2)
def row_body_edges(mask, y):
"""Find left and right edges of the body at row y from mask."""
if y < 0 or y >= mask.shape[0]:
return None, None
row = mask[y, :]
cols = np.where(row > 0)[0]
if len(cols) == 0:
return None, None
return int(cols.min()), int(cols.max())
def x_on_line_at_y(x1, y1, x2, y2, y):
"""Linear interpolate x at given y on line (x1,y1)->(x2,y2)."""
# if vertical segment or degenerate, return midpoint x
if y2 == y1:
return int(round((x1 + x2) / 2))
t = (y - y1) / (y2 - y1)
x = x1 + t * (x2 - x1)
return int(round(x))
def clamp_x(x, w):
return max(0, min(w - 1, int(round(x))))
# ------------------------------
# Main processing
# ------------------------------
def process_image(image_pil, real_height_cm):
image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
h, w, _ = image_cv.shape
# Segment with DeepLab
input_tensor = transform(image_pil).unsqueeze(0)
with torch.no_grad():
output = model(input_tensor)['out'][0]
pred = output.argmax(0).byte().cpu().numpy()
mask = (pred == 15).astype(np.uint8)
mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
# MediaPipe Pose
results = pose.process(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
if not results.pose_landmarks:
return {"error": "No person/landmarks detected"}, image_pil
lm = results.pose_landmarks.landmark
# helper to get (x,y) from landmark enum
def pt(enum_landmark):
L = lm[enum_landmark.value]
return int(L.x * w), int(L.y * h)
# Key landmarks (left/right shoulder & hip etc.)
L_SHOULDER = pt(mp_pose.PoseLandmark.LEFT_SHOULDER)
R_SHOULDER = pt(mp_pose.PoseLandmark.RIGHT_SHOULDER)
L_ELBOW = pt(mp_pose.PoseLandmark.LEFT_ELBOW)
L_HIP = pt(mp_pose.PoseLandmark.LEFT_HIP)
R_HIP = pt(mp_pose.PoseLandmark.RIGHT_HIP)
L_WRIST = pt(mp_pose.PoseLandmark.LEFT_WRIST)
L_ANKLE = pt(mp_pose.PoseLandmark.LEFT_ANKLE)
NOSE = pt(mp_pose.PoseLandmark.NOSE)
# Scale factor (px -> cm)
pixel_height = L_ANKLE[1] - NOSE[1]
if pixel_height <= 0:
return {"error": "Invalid body height detected in image (make sure full body is visible)."}, image_pil
scale = real_height_cm / pixel_height
# Shoulder (from mask)
y_shoulder = int((L_SHOULDER[1] + R_SHOULDER[1]) / 2)
left_s, right_s = row_body_edges(mask_resized, y_shoulder)
shoulder_px = (right_s - left_s) if (left_s is not None and right_s is not None) else None
# Hip (from mask)
y_hip = int((L_HIP[1] + R_HIP[1]) / 2)
left_h, right_h = row_body_edges(mask_resized, y_hip)
hip_px = (right_h - left_h) if (left_h is not None and right_h is not None) else None
# Waist (landmarks-only using shoulder->hip lines)
# compute midpoint y for left and right shoulder->hip, then average them
y_mid_left = (L_SHOULDER[1] + L_HIP[1]) / 2
y_mid_right = (R_SHOULDER[1] + R_HIP[1]) / 2
y_waist = int(round((y_mid_left + y_mid_right) / 2))
# compute x at y_waist along each side's shoulder->hip line
x_left = x_on_line_at_y(L_SHOULDER[0], L_SHOULDER[1], L_HIP[0], L_HIP[1], y_waist)
x_right = x_on_line_at_y(R_SHOULDER[0], R_SHOULDER[1], R_HIP[0], R_HIP[1], y_waist)
x_left = clamp_x(x_left, w)
x_right = clamp_x(x_right, w)
waist_px = abs(x_right - x_left)
# One thigh (left thigh width just below hip) - still from mask
thigh_y = int(L_HIP[1] + 0.15 * pixel_height)
left_t, right_t = row_body_edges(mask_resized, thigh_y)
left_thigh_px = None
if left_t is not None and right_t is not None and (L_HIP[0] >= left_t and L_HIP[0] <= right_t):
left_thigh_px = L_HIP[0] - left_t
# Lengths
torso_px = euclidean(((L_SHOULDER[0] + R_SHOULDER[0]) // 2, y_shoulder),
((L_HIP[0] + R_HIP[0]) // 2, y_hip))
arm_px = euclidean(L_SHOULDER, L_ELBOW) + euclidean(L_ELBOW, L_WRIST)
leg_px = euclidean(L_HIP, L_ANKLE)
# Convert to cm (None-safe)
def cm(px):
return round(px * scale, 1) if (px is not None) else None
measurements = {
"Shoulder width (cm)": cm(shoulder_px),
"Waist width (cm)": cm(waist_px),
"Hip width (cm)": cm(hip_px),
"Left thigh width (cm)": cm(left_thigh_px),
"Torso length (cm)": cm(torso_px),
"Arm length (cm)": cm(arm_px),
"Leg length (cm)": cm(leg_px),
}
# Visualization: draw landmarks, shoulder/hip horizontal mask-lines, shoulder->hip diagonals, waist line (landmarks-only)
vis = image_cv.copy()
# draw MediaPipe landmarks (green)
for lmk in lm:
cx, cy = int(lmk.x * w), int(lmk.y * h)
cv2.circle(vis, (cx, cy), 3, (0, 255, 0), -1)
# draw shoulder and hip horizontal mask-based lines (if available)
if left_s is not None and right_s is not None:
cv2.line(vis, (left_s, y_shoulder), (right_s, y_shoulder), (255, 0, 0), 2) # blue
if left_h is not None and right_h is not None:
cv2.line(vis, (left_h, y_hip), (right_h, y_hip), (0, 0, 255), 2) # red
# draw shoulder->hip diagonals (landmarks)
cv2.line(vis, (L_SHOULDER[0], L_SHOULDER[1]), (L_HIP[0], L_HIP[1]), (0, 255, 255), 2) # left diagonal, cyan
cv2.line(vis, (R_SHOULDER[0], R_SHOULDER[1]), (R_HIP[0], R_HIP[1]), (0, 255, 255), 2) # right diagonal, cyan
# draw waist line computed from diagonal interpolation
cv2.line(vis, (x_left, y_waist), (x_right, y_waist), (0, 255, 255), 3) # yellow/cyan
vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
vis_pil = Image.fromarray(vis)
return measurements, vis_pil
# ------------------------------
# Gradio Interface
# ------------------------------
def predict(image, height_cm):
result, vis = process_image(image, height_cm)
return result, vis
demo = gr.Interface(
fn=predict,
inputs=[
gr.Image(type="pil", label="Upload full body image"),
gr.Number(label="Height (cm)", value=170)
],
outputs=[
gr.JSON(label="Estimated Measurements"),
gr.Image(type="pil", label="Landmarks + Measurement Lines")
],
title="👕 AI Clothing Size Estimator",
description="Upload a full-body photo and input your height (cm). Shoulders (blue), Waist (landmarks-only line), Hip (red)."
)
if __name__ == "__main__":
demo.launch()