import gradio as gr
import cv2
import mediapipe as mp
import torch
import torchvision.transforms as T
from torchvision import models
import numpy as np
from PIL import Image
import math

# ------------------------------
# Load DeepLabV3 model
# ------------------------------
def load_deeplab():
    model = models.segmentation.deeplabv3_resnet101(pretrained=True).eval()
    transform = T.Compose([
        T.Resize(520),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225])
    ])
    return model, transform

model, transform = load_deeplab()

# MediaPipe pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True)

# ------------------------------
# Helper functions
# ------------------------------
def euclidean(p1, p2):
    return math.dist(p1, p2)

def row_body_edges(mask, y):
    """Find left and right edges of the body at row y from mask."""
    if y < 0 or y >= mask.shape[0]:
        return None, None
    row = mask[y, :]
    cols = np.where(row > 0)[0]
    if len(cols) == 0:
        return None, None
    return int(cols.min()), int(cols.max())

def x_on_line_at_y(x1, y1, x2, y2, y):
    """Linear interpolate x at given y on line (x1,y1)->(x2,y2)."""
    # if vertical segment or degenerate, return midpoint x
    if y2 == y1:
        return int(round((x1 + x2) / 2))
    t = (y - y1) / (y2 - y1)
    x = x1 + t * (x2 - x1)
    return int(round(x))

def clamp_x(x, w):
    return max(0, min(w - 1, int(round(x))))

# ------------------------------
# Main processing
# ------------------------------
def process_image(image_pil, real_height_cm):
    image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
    h, w, _ = image_cv.shape

    # Segment with DeepLab
    input_tensor = transform(image_pil).unsqueeze(0)
    with torch.no_grad():
        output = model(input_tensor)['out'][0]
    pred = output.argmax(0).byte().cpu().numpy()
    mask = (pred == 15).astype(np.uint8)
    mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)

    # MediaPipe Pose
    results = pose.process(cv2.cvtColor(image_cv, cv2.COLOR_BGR2RGB))
    if not results.pose_landmarks:
        return {"error": "No person/landmarks detected"}, image_pil

    lm = results.pose_landmarks.landmark

    # helper to get (x,y) from landmark enum
    def pt(enum_landmark):
        L = lm[enum_landmark.value]
        return int(L.x * w), int(L.y * h)

    # Key landmarks (left/right shoulder & hip etc.)
    L_SHOULDER = pt(mp_pose.PoseLandmark.LEFT_SHOULDER)
    R_SHOULDER = pt(mp_pose.PoseLandmark.RIGHT_SHOULDER)
    L_ELBOW = pt(mp_pose.PoseLandmark.LEFT_ELBOW)
    L_HIP = pt(mp_pose.PoseLandmark.LEFT_HIP)
    R_HIP = pt(mp_pose.PoseLandmark.RIGHT_HIP)
    L_WRIST = pt(mp_pose.PoseLandmark.LEFT_WRIST)
    L_ANKLE = pt(mp_pose.PoseLandmark.LEFT_ANKLE)
    NOSE = pt(mp_pose.PoseLandmark.NOSE)

    # Scale factor (px -> cm)
    pixel_height = L_ANKLE[1] - NOSE[1]
    if pixel_height <= 0:
        return {"error": "Invalid body height detected in image (make sure full body is visible)."}, image_pil
    scale = real_height_cm / pixel_height

    # Shoulder (from mask)
    y_shoulder = int((L_SHOULDER[1] + R_SHOULDER[1]) / 2)
    left_s, right_s = row_body_edges(mask_resized, y_shoulder)
    shoulder_px = (right_s - left_s) if (left_s is not None and right_s is not None) else None

    # Hip (from mask)
    y_hip = int((L_HIP[1] + R_HIP[1]) / 2)
    left_h, right_h = row_body_edges(mask_resized, y_hip)
    hip_px = (right_h - left_h) if (left_h is not None and right_h is not None) else None

    # Waist (landmarks-only using shoulder->hip lines)
    # compute midpoint y for left and right shoulder->hip, then average them
    y_mid_left = (L_SHOULDER[1] + L_HIP[1]) / 2
    y_mid_right = (R_SHOULDER[1] + R_HIP[1]) / 2
    y_waist = int(round((y_mid_left + y_mid_right) / 2))

    # compute x at y_waist along each side's shoulder->hip line
    x_left = x_on_line_at_y(L_SHOULDER[0], L_SHOULDER[1], L_HIP[0], L_HIP[1], y_waist)
    x_right = x_on_line_at_y(R_SHOULDER[0], R_SHOULDER[1], R_HIP[0], R_HIP[1], y_waist)
    x_left = clamp_x(x_left, w)
    x_right = clamp_x(x_right, w)
    waist_px = abs(x_right - x_left)

    # One thigh (left thigh width just below hip) - still from mask
    thigh_y = int(L_HIP[1] + 0.15 * pixel_height)
    left_t, right_t = row_body_edges(mask_resized, thigh_y)
    left_thigh_px = None
    if left_t is not None and right_t is not None and (L_HIP[0] >= left_t and L_HIP[0] <= right_t):
        left_thigh_px = L_HIP[0] - left_t

    # Lengths
    torso_px = euclidean(((L_SHOULDER[0] + R_SHOULDER[0]) // 2, y_shoulder),
                         ((L_HIP[0] + R_HIP[0]) // 2, y_hip))
    arm_px = euclidean(L_SHOULDER, L_ELBOW) + euclidean(L_ELBOW, L_WRIST)
    leg_px = euclidean(L_HIP, L_ANKLE)

    # Convert to cm (None-safe)
    def cm(px):
        return round(px * scale, 1) if (px is not None) else None

    measurements = {
        "Shoulder width (cm)": cm(shoulder_px),
        "Waist width (cm)": cm(waist_px),
        "Hip width (cm)": cm(hip_px),
        "Left thigh width (cm)": cm(left_thigh_px),
        "Torso length (cm)": cm(torso_px),
        "Arm length (cm)": cm(arm_px),
        "Leg length (cm)": cm(leg_px),
    }

    # Visualization: draw landmarks, shoulder/hip horizontal mask-lines, shoulder->hip diagonals, waist line (landmarks-only)
    vis = image_cv.copy()

    # draw MediaPipe landmarks (green)
    for lmk in lm:
        cx, cy = int(lmk.x * w), int(lmk.y * h)
        cv2.circle(vis, (cx, cy), 3, (0, 255, 0), -1)

    # draw shoulder and hip horizontal mask-based lines (if available)
    if left_s is not None and right_s is not None:
        cv2.line(vis, (left_s, y_shoulder), (right_s, y_shoulder), (255, 0, 0), 2)  # blue
    if left_h is not None and right_h is not None:
        cv2.line(vis, (left_h, y_hip), (right_h, y_hip), (0, 0, 255), 2)  # red

    # draw shoulder->hip diagonals (landmarks)
    cv2.line(vis, (L_SHOULDER[0], L_SHOULDER[1]), (L_HIP[0], L_HIP[1]), (0, 255, 255), 2)  # left diagonal, cyan
    cv2.line(vis, (R_SHOULDER[0], R_SHOULDER[1]), (R_HIP[0], R_HIP[1]), (0, 255, 255), 2)  # right diagonal, cyan

    # draw waist line computed from diagonal interpolation
    cv2.line(vis, (x_left, y_waist), (x_right, y_waist), (0, 255, 255), 3)  # yellow/cyan

    vis = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
    vis_pil = Image.fromarray(vis)

    return measurements, vis_pil

# ------------------------------
# Gradio Interface
# ------------------------------
def predict(image, height_cm):
    result, vis = process_image(image, height_cm)
    return result, vis

demo = gr.Interface(
    fn=predict,
    inputs=[
        gr.Image(type="pil", label="Upload full body image"),
        gr.Number(label="Height (cm)", value=170)
    ],
    outputs=[
        gr.JSON(label="Estimated Measurements"),
        gr.Image(type="pil", label="Landmarks + Measurement Lines")
    ],
    title="👕 AI Clothing Size Estimator",
    description="Upload a full-body photo and input your height (cm). Shoulders (blue), Waist (landmarks-only line), Hip (red)."
)

if __name__ == "__main__":
    demo.launch()