Spaces:

yuvabe-ai
/

Ring_Size_Scale

Sleeping

App Files Files Community

Shri Jayaram commited on Sep 24, 2024

Commit

fdad24e

1 Parent(s): c7d31dd

scale detection method

Browse files

Files changed (3) hide show

FlowChart.png +0 -0
app.py +398 -0
requirements.txt +10 -0

FlowChart.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,398 @@

+import streamlit as st
+from PIL import Image, ImageDraw, ImageFont
+import io
+from io import BytesIO
+import os
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+from rembg import remove
+import mediapipe as mp
+import torch
+from transformers import AutoProcessor, AutoModelForCausalLM
+from transformers.dynamic_module_utils import get_imports
+from unittest.mock import patch
+from scipy.spatial import distance as dist
+st.set_page_config(layout="wide", page_title="Ring Size Measurement")
+ring_size_dict = {
+    14.0: 3,
+    14.4: 3.5,
+    14.8: 4,
+    15.2: 4.5,
+    15.6: 5,
+    16.0: 5.5,
+    16.45: 6,
+    16.9: 6.5,
+    17.3: 7,
+    17.7: 7.5,
+    18.2: 8,
+    18.6: 8.5,
+    19.0: 9,
+    19.4: 9.5,
+    19.8: 10,
+    20.2: 10.5,
+    20.6: 11,
+    21.0: 11.5,
+    21.4: 12,
+    21.8: 12.5,
+    22.2: 13,
+    22.6: 13.5
+}
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def fixed_get_imports(filename: str | os.PathLike) -> list[str]:
+    if not str(filename).endswith("modeling_florence2.py"):
+        return get_imports(filename)
+    imports = get_imports(filename)
+    imports.remove("flash_attn")
+    return imports
+def load_model():
+    model_id = "microsoft/Florence-2-base-ft"
+    processor = AutoProcessor.from_pretrained(model_id, torch_dtype=torch.qint8, trust_remote_code=True)
+    try:
+        os.mkdir("temp")
+    except:
+        pass
+    with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
+        model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", trust_remote_code=True)
+    Qmodel = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
+    return Qmodel.to(device), processor
+if 'model_loaded' not in st.session_state:
+    st.session_state.model_loaded = False
+if not st.session_state.model_loaded:
+    with st.spinner('Loading model...'):
+        st.session_state.model, st.session_state.processor = load_model()
+        st.session_state.model_loaded = True
+        st.write("Model loaded complete")
+def calculate_pixel_per_metric(image, known_diameter_of_coin=25):
+    def generate_labels(model, processor, task_prompt, image, text_input=None):
+        if text_input is None:
+            prompt = task_prompt
+        else:
+            prompt = task_prompt + " " + text_input
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            early_stopping=False,
+            do_sample=False,
+            num_beams=3,
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+        output = processor.post_process_generation(
+            generated_text,
+            task=task_prompt,
+            image_size=(image.width, image.height)
+        )
+        return output
+    def plot_bbox(original_image, data):
+        # Create a copy of the original image to draw on
+        image_with_bboxes = original_image.copy()
+        # Use Pillow to draw bounding boxes and labels
+        draw = ImageDraw.Draw(image_with_bboxes)
+        def calculate_bbox_dimensions(bbox):
+            x1, y1, x2, y2 = bbox
+            width = x2 - x1
+            height = y2 - y1
+            return width, height
+        # Inside your `plot_bbox` function, after drawing the bounding box:
+        for bbox, label in zip(data['bboxes'], data['labels']):
+            x1, y1, x2, y2 = bbox
+            draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
+            draw.text((x1, y1), label, fill="red", font=ImageFont.truetype("arial.ttf", 28))
+        # Calculate dimensions
+        width, height = calculate_bbox_dimensions(bbox)
+        print(f"Label: {label}, Width: {width}, Height: {height}")
+        dimension_text = f"W: {width}, H: {height}"
+        draw.text((x1, y1 + 20), dimension_text, fill="red", font=ImageFont.truetype("arial.ttf", 28))
+        real_world_dimension_mm = 160
+        largest_dimension = max(width, height)
+        pixels_per_mm = largest_dimension / real_world_dimension_mm
+        ratio_text = f"Pixels/mm: {pixels_per_mm:.2f}"
+        draw.text((x1, y1 + 40), ratio_text, fill="red", font=ImageFont.truetype("arial.ttf", 28))
+        # buf = BytesIO()
+        # image_with_bboxes.save(buf, format='PNG')
+        # buf.seek(0)
+        return image_with_bboxes,pixels_per_mm,pixels_per_mm
+    def detecting_ruler(model, processor, image, task_prompt, text_input=None):
+        results = generate_labels(model, processor, task_prompt, image, text_input=text_input)
+        image_with_bboxes, value_1, value_2 = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
+        return value_1, value_2, image_with_bboxes
+    image_for_model = image.copy()
+    image_for_model = cv2.cvtColor(image_for_model, cv2.COLOR_BGR2RGB)
+    image_for_model = Image.fromarray(image_for_model)
+    # if image_for_model.mode != 'RGB':
+    #     image_for_model = image_for_model.convert('RGB')
+    # Process the image
+    text_input = "ruler"
+    task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
+    pixel_per_metric, mm_per_pixel, marked_image_buf = detecting_ruler(st.session_state.model, st.session_state.processor, image_for_model, task_prompt, text_input)
+    return pixel_per_metric, mm_per_pixel, marked_image_buf
+def process_image(image):
+    return remove(image)
+def calculate_pip_width(image, original_img, pixel_per_metric):
+    def calSize(xA, yA, xB, yB, color_circle, color_line, img):
+        d = dist.euclidean((xA, yA), (xB, yB))
+        cv2.circle(img, (int(xA), int(yA)), 5, color_circle, -1)
+        cv2.circle(img, (int(xB), int(yB)), 5, color_circle, -1)
+        cv2.line(img, (int(xA), int(yA)), (int(xB), int(yB)), color_line, 2)
+        d_mm = d / pixel_per_metric
+        d_mm = d_mm - 1.5
+        cv2.putText(img, "{:.1f}".format(d_mm), (int(xA - 15), int(yA - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (255, 255, 255), 2)
+        print(d_mm)
+        return d_mm
+    def process_point(point, cnt, m1, b):
+        x1, x2 = point[0], point[0]
+        y1 = m1 * x1 + b
+        y2 = m1 * x2 + b
+        result = 1.0
+        while result > 0:
+            result = cv2.pointPolygonTest(cnt, (x1, y1), False)
+            x1 += 1
+            y1 = m1 * x1 + b
+        x1 -= 1
+        result = 1.0
+        while result > 0:
+            result = cv2.pointPolygonTest(cnt, (x2, y2), False)
+            x2 -= 1
+            y2 = m1 * x2 + b
+        x2 += 1
+        return x1, y1, x2, y2
+    og_img = original_img.copy()
+    imgH, imgW, _ = image.shape
+    imgcpy = image.copy()
+    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    _, binary_image = cv2.threshold(image_gray, 1, 255, cv2.THRESH_BINARY)
+    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contour_image = np.zeros_like(image_gray)
+    cv2.drawContours(contour_image, contours, -1, (255), thickness=cv2.FILLED)
+    cv2.drawContours(imgcpy, contours, -1, (0, 255, 0), 2)
+    # print("length : ",len(contours))
+    marked_img = image.copy()
+    if len(contours) > 0:
+        cnt = max(contours, key=cv2.contourArea)
+        frame2 = cv2.cvtColor(og_img, cv2.COLOR_BGR2RGB)
+        handsLM = mp.solutions.hands.Hands(max_num_hands=1, min_detection_confidence=0.8, min_tracking_confidence=0.8)
+        pr = handsLM.process(frame2)
+        print(pr.multi_hand_landmarks)
+        if pr.multi_hand_landmarks:
+            for hand_landmarks in pr.multi_hand_landmarks:
+                lmlist = []
+                for id, landMark in enumerate(hand_landmarks.landmark):
+                    xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH)
+                    lmlist.append([id, xPos, yPos])
+                if len(lmlist) != 0:
+                    pip_joint = [lmlist[14][1], lmlist[14][2]]
+                    mcp_joint = [lmlist[13][1], lmlist[13][2]]
+                    midpoint_x = (pip_joint[0] + mcp_joint[0]) / 2
+                    midpoint_y = (pip_joint[1] + mcp_joint[1]) / 2
+                    midpoint = [midpoint_x, midpoint_y]
+                    m2 = (pip_joint[1] - mcp_joint[1]) / (pip_joint[0] - mcp_joint[0])
+                    m1 = -1 / m2
+                    b = pip_joint[1] - m1 * pip_joint[0]
+                    #pip_joint
+                    x1_pip, y1_pip, x2_pip, y2_pip = process_point(pip_joint, cnt, m1, b)
+                    m2 = (midpoint_y - mcp_joint[1]) / (midpoint_x - mcp_joint[0])
+                    m1 = -1 / m2
+                    b = midpoint_y - m1 * midpoint_x
+                    #midpoint
+                    x1_mid, y1_mid, x2_mid, y2_mid = process_point(midpoint, cnt, m1, b)
+                    d_mm_pip = calSize(x1_pip, y1_pip, x2_pip, y2_pip, (255, 0, 0), (255, 0, 255), original_img)
+                    d_mm_mid = calSize(x1_mid, y1_mid, x2_mid, y2_mid, (0, 255, 0), (0, 0, 255), original_img)
+    largest_d_mm = max(int(d_mm_mid),int(d_mm_pip))
+    return original_img, largest_d_mm, imgcpy, marked_img
+def mark_hand_landmarks(image_path):
+    mp_hands = mp.solutions.hands
+    hands = mp_hands.Hands()
+    mp_draw = mp.solutions.drawing_utils
+    img = image_path
+    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    results = hands.process(img_rgb)
+    if results.multi_hand_landmarks:
+        for hand_landmarks in results.multi_hand_landmarks:
+            mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+            mcp = hand_landmarks.landmark[13]
+            pip = hand_landmarks.landmark[14]
+            img_height, img_width, _ = img.shape
+            mcp_x, mcp_y = int(mcp.x * img_width), int(mcp.y * img_height)
+            pip_x, pip_y = int(pip.x * img_width), int(pip.y * img_height)
+            cv2.circle(img, (mcp_x, mcp_y), 10, (255, 0, 0), -1)
+            cv2.circle(img, (pip_x, pip_y), 10, (255, 0, 0), -1)
+    return img
+def show_resized_image(images, titles, scale=0.5):
+    num_images = len(images)
+    fig, axes = plt.subplots(2, 3, figsize=(17, 13))
+    axes = axes.flatten()
+    for ax in axes[num_images:]:
+        ax.axis('off')
+    i = 0
+    for ax, img, title in zip(axes, images, titles):
+        i = i + 1
+        print(i)
+        resized_image = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
+        ax.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
+        ax.set_title(title)
+        ax.axis('off')
+    plt.tight_layout()
+    img_stream = BytesIO()
+    plt.savefig(img_stream, format='png')
+    img_stream.seek(0)
+    plt.close(fig)
+    return img_stream
+def get_ring_size(mm_value):
+    if mm_value in ring_size_dict:
+        return ring_size_dict[mm_value]
+    else:
+        closest_mm = min(ring_size_dict.keys(), key=lambda x: abs(x - mm_value))
+        return ring_size_dict[closest_mm]
+# st.set_page_config(layout="wide", page_title="Ring Size Measurement")
+st.write("## Determine Your Ring Size")
+st.write(
+    "📏 Upload an image of your hand to measure the finger width and determine your ring size. The measurement will be displayed along with a visual breakdown of the image processing flow."
+)
+st.sidebar.write("## Upload :gear:")
+#~~
+st.write("### Workflow Overview")
+st.image("FlowChart.png", caption="Workflow Overview", use_column_width=True)
+st.write("### Detailed Workflow")
+st.write("1. **Hough Circle Transform:** The Hough Circle Transform is a technique used to detect circles in an image. It works by transforming the image into a parameter space, identifying circles based on their radius and center coordinates. This method is effective for locating circular objects, such as a coin, within the image.")
+st.write("2. **Pixel Per Metric Ratio:** The Pixel Per Metric Ratio is used to convert pixel measurements into real-world units. By comparing the pixel length obtained from image analysis (i.e., Hough Circle) with the known real-world measurement of the reference object (coin), we get the ratio. This ratio then allows us to accurately scale and size estimation of objects within the image.")
+st.write("3. **Background Removal:** Removing the background first ensures that only the relevant subject is highlighted. We start by converting the image to grayscale and applying thresholding to distinguish the subject from the background. Erosion and dilation then clean up the image, improving the detection of specific features like individual fingers.")
+st.write("4. **Contour Detection:** We use Contour Detection to find the largest contour, which allows us to outline or draw a boundary around the subject (i.e., hand). This highlights the object's shape and edges, improving the precision of the subject.")
+st.write("5. **Finding Hand Landmarks:** This involves using the MediaPipe library to identify key points on the hand, such as the PIP (Proximal Interphalangeal) and MCP (Metacarpophalangeal) joints of the ring finger. This enables precise tracking and analysis of finger positions and movements.")
+st.write("6. **Determining Finger Width:** Here we use the slope formula `[y = mx + b]` with PIP and MCP points to measure the finger's width. We project outward perpendicularly from the PIP point towards the MCP point, then apply a point polygon test to accurately determine the pixel width of the finger.")
+st.write("7. **Predicting Ring Size:** Predicting Ring Size involves calculating the finger’s diameter using the Pixel Per Metric Ratio and the largest width measurement at the PIP or MCP joint. This diameter is then used to predict the appropriate ring size.")
+#~~
+MAX_FILE_SIZE = 5 * 1024 * 1024  # 5MB
+def process_image_and_get_results(upload):
+    image = Image.open(upload)
+    # image =  cv2.imread(upload)
+    image_np = np.array(image)
+    image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    original_img = image_np.copy()
+    og_img1 = image_np.copy()
+    og_img2 = image_np.copy()
+    img_1 = image_np.copy()
+    hand_lms = mark_hand_landmarks(img_1)
+    pixel_per_metric, mm_per_pixel, image_with_coin_info = calculate_pixel_per_metric(image_np)
+    processed_image = process_image(og_img1)
+    image_with_pip_width, width_mm, contour_image, pip_mark_img = calculate_pip_width(processed_image, original_img, pixel_per_metric)
+    image_with_coin_info = np.array(image_with_coin_info)
+    if image_with_coin_info is None:
+        print("inside1")
+        raise ValueError("Image is None, cannot resize.")
+    elif not isinstance(image_with_coin_info, (np.ndarray, cv2.UMat)):
+        print("inside2")
+        raise TypeError(f"Invalid image type: {type(image_with_coin_info)}. Expected numpy array or cv2.UMat.")
+    ring_size = get_ring_size(width_mm)
+    return {
+        "processed_image": image_with_pip_width,
+        "original_image": og_img2,
+        "hand_lm_marked_image": hand_lms,
+        "image_with_coin_info": image_with_coin_info,
+        "contour_image": contour_image,
+        "width_mm": width_mm,
+        "ring_size": ring_size
+    }
+def show_how_it_works(processed_image):
+    st.write("## How It Works")
+    st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:")
+    st.image(processed_image, caption="Image Processing Flow", use_column_width=True)
+col1, col2 = st.columns(2)
+my_upload = st.sidebar.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+if my_upload is not None:
+    if my_upload.size > MAX_FILE_SIZE:
+        st.error("The uploaded file is too large. Please upload an image smaller than 5MB.")
+    else:
+        st.write("## Image Processing Flow")
+        results = process_image_and_get_results(my_upload)
+        col1.write("Uploaded Image :camera:")
+        col1.image(cv2.cvtColor(results["original_image"], cv2.COLOR_BGR2RGB), caption="Uploaded Image")
+        col2.write("Processed Image :wrench:")
+        col2.image(cv2.cvtColor(results["processed_image"], cv2.COLOR_BGR2RGB), caption="Processed Image with PIP Width")
+        st.write(f"📏 The width of your finger is {results['width_mm']:.2f} mm, and the estimated ring size is {results['ring_size']:.1f}.")
+        if st.button("How it Works"):
+            st.write("## How It Works")
+            st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:")
+            print("here")
+            img_stream = show_resized_image(
+                [results["original_image"], results["image_with_coin_info"], results["contour_image"], results["hand_lm_marked_image"], results["processed_image"]],
+                ['Original Image', 'Image with Scale Info', 'Contour Boundary Image', 'Hand Landmarks', 'Ring Finger Width'],
+                scale=0.5
+            )
+            st.image(img_stream, caption="Processing Flow", use_column_width=True)
+else:
+    st.info("Please upload an image to get started.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit
+pillow
+opencv-python
+numpy
+matplotlib
+rembg
+mediapipe
+torch
+transformers
+scipy