Spaces:

neo-decade10
/

VTON_Size_Estimator

Running

App Files Files Community

VTON_Size_Estimator / app.py

neo-decade10

Update app.py

c91a638 verified 3 months ago

raw

history blame contribute delete

7.22 kB

	import gradio as gr
	import cv2
	import mediapipe as mp
	import torch
	import torchvision.transforms as T
	from torchvision import models
	import numpy as np
	from PIL import Image
	import math

	# ------------------------------
	# Load DeepLabV3 model
	# ------------------------------
	def load_deeplab():
	model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
	transform = T.Compose([
	T.Resize(520),
	T.ToTensor(),
	T.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225])
	])
	return model, transform

	model, transform = load_deeplab()

	# MediaPipe pose
	mp_pose = mp.solutions.pose
	pose = mp_pose.Pose(static_image_mode=True)

	# ------------------------------
	# Helper functions
	# ------------------------------
	def euclidean(p1, p2):
	return math.dist(p1, p2)

	def row_body_edges(mask, y):
	"""Find left and right edges of the body at row y from mask."""
	if y < 0 or y >= mask.shape[0]:
	return None, None
	row = mask[y, :]
	cols = np.where(row > 0)[0]
	if len(cols) == 0:
	return None, None
	return int(cols.min()), int(cols.max())

	def x_on_line_at_y(x1, y1, x2, y2, y):
	"""Linear interpolate x at given y on line (x1,y1)->(x2,y2)."""
	# if vertical segment or degenerate, return midpoint x
	if y2 == y1:
	return int(round((x1 + x2) / 2))
	t = (y - y1) / (y2 - y1)
	x = x1 + t * (x2 - x1)
	return int(round(x))

	def clamp_x(x, w):
	return max(0, min(w - 1, int(round(x))))

	# ------------------------------
	# Main processing
	# ------------------------------
	def process_image(image_pil, real_height_cm):
	image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
	h, w, _ = image_cv.shape

	# Segment with DeepLab
	input_tensor = transform(image_pil).unsqueeze(0)
	with torch.no_grad():
	output = model(input_tensor)['out'][0]
	pred = output.argmax(0).byte().cpu().numpy()
	mask = (pred == 15).astype(np.uint8)
	mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)

	# MediaPipe Pose
	results = pose.process(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
	if not results.pose_landmarks:
	return {"error": "No person/landmarks detected"}, image_pil

	lm = results.pose_landmarks.landmark

	# helper to get (x,y) from landmark enum
	def pt(enum_landmark):
	L = lm[enum_landmark.value]
	return int(L.x * w), int(L.y * h)

	# Key landmarks (left/right shoulder & hip etc.)
	L_SHOULDER = pt(mp_pose.PoseLandmark.LEFT_SHOULDER)
	R_SHOULDER = pt(mp_pose.PoseLandmark.RIGHT_SHOULDER)
	L_ELBOW = pt(mp_pose.PoseLandmark.LEFT_ELBOW)
	L_HIP = pt(mp_pose.PoseLandmark.LEFT_HIP)
	R_HIP = pt(mp_pose.PoseLandmark.RIGHT_HIP)
	L_WRIST = pt(mp_pose.PoseLandmark.LEFT_WRIST)
	L_ANKLE = pt(mp_pose.PoseLandmark.LEFT_ANKLE)
	NOSE = pt(mp_pose.PoseLandmark.NOSE)

	# Scale factor (px -> cm)
	pixel_height = L_ANKLE[1] - NOSE[1]
	if pixel_height <= 0:
	return {"error": "Invalid body height detected in image (make sure full body is visible)."}, image_pil
	scale = real_height_cm / pixel_height

	# Shoulder (from mask)
	y_shoulder = int((L_SHOULDER[1] + R_SHOULDER[1]) / 2)
	left_s, right_s = row_body_edges(mask_resized, y_shoulder)
	shoulder_px = (right_s - left_s) if (left_s is not None and right_s is not None) else None

	# Hip (from mask)
	y_hip = int((L_HIP[1] + R_HIP[1]) / 2)
	left_h, right_h = row_body_edges(mask_resized, y_hip)
	hip_px = (right_h - left_h) if (left_h is not None and right_h is not None) else None

	# Waist (landmarks-only using shoulder->hip lines)
	# compute midpoint y for left and right shoulder->hip, then average them
	y_mid_left = (L_SHOULDER[1] + L_HIP[1]) / 2
	y_mid_right = (R_SHOULDER[1] + R_HIP[1]) / 2
	y_waist = int(round((y_mid_left + y_mid_right) / 2))

	# compute x at y_waist along each side's shoulder->hip line
	x_left = x_on_line_at_y(L_SHOULDER[0], L_SHOULDER[1], L_HIP[0], L_HIP[1], y_waist)
	x_right = x_on_line_at_y(R_SHOULDER[0], R_SHOULDER[1], R_HIP[0], R_HIP[1], y_waist)
	x_left = clamp_x(x_left, w)
	x_right = clamp_x(x_right, w)
	waist_px = abs(x_right - x_left)

	# One thigh (left thigh width just below hip) - still from mask
	thigh_y = int(L_HIP[1] + 0.15 * pixel_height)
	left_t, right_t = row_body_edges(mask_resized, thigh_y)
	left_thigh_px = None
	if left_t is not None and right_t is not None and (L_HIP[0] >= left_t and L_HIP[0] <= right_t):
	left_thigh_px = L_HIP[0] - left_t

	# Lengths
	torso_px = euclidean(((L_SHOULDER[0] + R_SHOULDER[0]) // 2, y_shoulder),
	((L_HIP[0] + R_HIP[0]) // 2, y_hip))
	arm_px = euclidean(L_SHOULDER, L_ELBOW) + euclidean(L_ELBOW, L_WRIST)
	leg_px = euclidean(L_HIP, L_ANKLE)

	# Convert to cm (None-safe)
	def cm(px):
	return round(px * scale, 1) if (px is not None) else None

	measurements = {
	"Shoulder width (cm)": cm(shoulder_px),
	"Waist width (cm)": cm(waist_px),
	"Hip width (cm)": cm(hip_px),
	"Left thigh width (cm)": cm(left_thigh_px),
	"Torso length (cm)": cm(torso_px),
	"Arm length (cm)": cm(arm_px),
	"Leg length (cm)": cm(leg_px),
	}

	# Visualization: draw landmarks, shoulder/hip horizontal mask-lines, shoulder->hip diagonals, waist line (landmarks-only)
	vis = image_cv.copy()

	# draw MediaPipe landmarks (green)
	for lmk in lm:
	cx, cy = int(lmk.x * w), int(lmk.y * h)
	cv2.circle(vis, (cx, cy), 3, (0, 255, 0), -1)

	# draw shoulder and hip horizontal mask-based lines (if available)
	if left_s is not None and right_s is not None:
	cv2.line(vis, (left_s, y_shoulder), (right_s, y_shoulder), (255, 0, 0), 2) # blue
	if left_h is not None and right_h is not None:
	cv2.line(vis, (left_h, y_hip), (right_h, y_hip), (0, 0, 255), 2) # red

	# draw shoulder->hip diagonals (landmarks)
	cv2.line(vis, (L_SHOULDER[0], L_SHOULDER[1]), (L_HIP[0], L_HIP[1]), (0, 255, 255), 2) # left diagonal, cyan
	cv2.line(vis, (R_SHOULDER[0], R_SHOULDER[1]), (R_HIP[0], R_HIP[1]), (0, 255, 255), 2) # right diagonal, cyan

	# draw waist line computed from diagonal interpolation
	cv2.line(vis, (x_left, y_waist), (x_right, y_waist), (0, 255, 255), 3) # yellow/cyan

	vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
	vis_pil = Image.fromarray(vis)

	return measurements, vis_pil

	# ------------------------------
	# Gradio Interface
	# ------------------------------
	def predict(image, height_cm):
	result, vis = process_image(image, height_cm)
	return result, vis

	demo = gr.Interface(
	fn=predict,
	inputs=[
	gr.Image(type="pil", label="Upload full body image"),
	gr.Number(label="Height (cm)", value=170)
	],
	outputs=[
	gr.JSON(label="Estimated Measurements"),
	gr.Image(type="pil", label="Landmarks + Measurement Lines")
	],
	title="👕 AI Clothing Size Estimator",
	description="Upload a full-body photo and input your height (cm). Shoulders (blue), Waist (landmarks-only line), Hip (red)."
	)

	if __name__ == "__main__":
	demo.launch()