import streamlit as st from PIL import Image, ImageDraw, ImageFont import io from io import BytesIO import os import cv2 import numpy as np import matplotlib.pyplot as plt from rembg import remove import mediapipe as mp import torch from transformers import AutoProcessor, AutoModelForCausalLM from transformers.dynamic_module_utils import get_imports from unittest.mock import patch from scipy.spatial import distance as dist st.set_page_config(layout="wide", page_title="Ring Size Measurement") ring_size_dict = { 14.0: 3, 14.4: 3.5, 14.8: 4, 15.2: 4.5, 15.6: 5, 16.0: 5.5, 16.45: 6, 16.9: 6.5, 17.3: 7, 17.7: 7.5, 18.2: 8, 18.6: 8.5, 19.0: 9, 19.4: 9.5, 19.8: 10, 20.2: 10.5, 20.6: 11, 21.0: 11.5, 21.4: 12, 21.8: 12.5, 22.2: 13, 22.6: 13.5 } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def fixed_get_imports(filename: str | os.PathLike) -> list[str]: if not str(filename).endswith("modeling_florence2.py"): return get_imports(filename) imports = get_imports(filename) imports.remove("flash_attn") return imports def load_model(): model_id = "microsoft/Florence-2-base-ft" processor = AutoProcessor.from_pretrained(model_id, torch_dtype=torch.qint8, trust_remote_code=True) try: os.mkdir("temp") except: pass with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", trust_remote_code=True) Qmodel = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8) return Qmodel.to(device), processor if 'model_loaded' not in st.session_state: st.session_state.model_loaded = False if not st.session_state.model_loaded: with st.spinner('Loading model...'): st.session_state.model, st.session_state.processor = load_model() st.session_state.model_loaded = True st.write("Model loaded complete") def calculate_pixel_per_metric(image, known_diameter_of_coin=25): def generate_labels(model, processor, task_prompt, image, text_input=None): if text_input is None: prompt = task_prompt else: prompt = task_prompt + " " + text_input inputs = processor(text=prompt, images=image, return_tensors="pt").to(device) generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, early_stopping=False, do_sample=False, num_beams=3, ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] output = processor.post_process_generation( generated_text, task=task_prompt, image_size=(image.width, image.height) ) return output def plot_bbox(original_image, data): # Create a copy of the original image to draw on image_with_bboxes = original_image.copy() # Use Pillow to draw bounding boxes and labels draw = ImageDraw.Draw(image_with_bboxes) def calculate_bbox_dimensions(bbox): x1, y1, x2, y2 = bbox width = x2 - x1 height = y2 - y1 return width, height # Inside your `plot_bbox` function, after drawing the bounding box: font = ImageFont.truetype("arial.ttf", 28) for bbox, label in zip(data['bboxes'], data['labels']): x1, y1, x2, y2 = bbox draw.rectangle([x1, y1, x2, y2], outline="red", width=2) draw.text((x1, y1), label, fill="red", font=font) # Calculate dimensions width, height = calculate_bbox_dimensions(bbox) print(f"Label: {label}, Width: {width}, Height: {height}") dimension_text = f"W: {width}, H: {height}" draw.text((x1, y1 + 20), dimension_text, fill="red", font=font) real_world_dimension_mm = 160 largest_dimension = max(width, height) pixels_per_mm = largest_dimension / real_world_dimension_mm ratio_text = f"Pixels/mm: {pixels_per_mm:.2f}" draw.text((x1, y1 + 40), ratio_text, fill="red", font=font) # buf = BytesIO() # image_with_bboxes.save(buf, format='PNG') # buf.seek(0) return image_with_bboxes,pixels_per_mm,pixels_per_mm def detecting_ruler(model, processor, image, task_prompt, text_input=None): results = generate_labels(model, processor, task_prompt, image, text_input=text_input) image_with_bboxes, value_1, value_2 = plot_bbox(image, results['']) return value_1, value_2, image_with_bboxes image_for_model = image.copy() image_for_model = cv2.cvtColor(image_for_model, cv2.COLOR_BGR2RGB) image_for_model = Image.fromarray(image_for_model) # if image_for_model.mode != 'RGB': # image_for_model = image_for_model.convert('RGB') # Process the image text_input = "ruler" task_prompt = "" pixel_per_metric, mm_per_pixel, marked_image_buf = detecting_ruler(st.session_state.model, st.session_state.processor, image_for_model, task_prompt, text_input) return pixel_per_metric, mm_per_pixel, marked_image_buf def process_image(image): return remove(image) def calculate_pip_width(image, original_img, pixel_per_metric): def calSize(xA, yA, xB, yB, color_circle, color_line, img): d = dist.euclidean((xA, yA), (xB, yB)) cv2.circle(img, (int(xA), int(yA)), 5, color_circle, -1) cv2.circle(img, (int(xB), int(yB)), 5, color_circle, -1) cv2.line(img, (int(xA), int(yA)), (int(xB), int(yB)), color_line, 2) d_mm = d / pixel_per_metric d_mm = d_mm - 1.5 cv2.putText(img, "{:.1f}".format(d_mm), (int(xA - 15), int(yA - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (255, 255, 255), 2) print(d_mm) return d_mm def process_point(point, cnt, m1, b): x1, x2 = point[0], point[0] y1 = m1 * x1 + b y2 = m1 * x2 + b result = 1.0 while result > 0: result = cv2.pointPolygonTest(cnt, (x1, y1), False) x1 += 1 y1 = m1 * x1 + b x1 -= 1 result = 1.0 while result > 0: result = cv2.pointPolygonTest(cnt, (x2, y2), False) x2 -= 1 y2 = m1 * x2 + b x2 += 1 return x1, y1, x2, y2 og_img = original_img.copy() imgH, imgW, _ = image.shape imgcpy = image.copy() image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) _, binary_image = cv2.threshold(image_gray, 1, 255, cv2.THRESH_BINARY) contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contour_image = np.zeros_like(image_gray) cv2.drawContours(contour_image, contours, -1, (255), thickness=cv2.FILLED) cv2.drawContours(imgcpy, contours, -1, (0, 255, 0), 2) # print("length : ",len(contours)) marked_img = image.copy() if len(contours) > 0: cnt = max(contours, key=cv2.contourArea) frame2 = cv2.cvtColor(og_img, cv2.COLOR_BGR2RGB) handsLM = mp.solutions.hands.Hands(max_num_hands=1, min_detection_confidence=0.8, min_tracking_confidence=0.8) pr = handsLM.process(frame2) print(pr.multi_hand_landmarks) if pr.multi_hand_landmarks: for hand_landmarks in pr.multi_hand_landmarks: lmlist = [] for id, landMark in enumerate(hand_landmarks.landmark): xPos, yPos = int(landMark.x * imgW), int(landMark.y * imgH) lmlist.append([id, xPos, yPos]) if len(lmlist) != 0: pip_joint = [lmlist[14][1], lmlist[14][2]] mcp_joint = [lmlist[13][1], lmlist[13][2]] midpoint_x = (pip_joint[0] + mcp_joint[0]) / 2 midpoint_y = (pip_joint[1] + mcp_joint[1]) / 2 midpoint = [midpoint_x, midpoint_y] m2 = (pip_joint[1] - mcp_joint[1]) / (pip_joint[0] - mcp_joint[0]) m1 = -1 / m2 b = pip_joint[1] - m1 * pip_joint[0] #pip_joint x1_pip, y1_pip, x2_pip, y2_pip = process_point(pip_joint, cnt, m1, b) m2 = (midpoint_y - mcp_joint[1]) / (midpoint_x - mcp_joint[0]) m1 = -1 / m2 b = midpoint_y - m1 * midpoint_x #midpoint x1_mid, y1_mid, x2_mid, y2_mid = process_point(midpoint, cnt, m1, b) d_mm_pip = calSize(x1_pip, y1_pip, x2_pip, y2_pip, (255, 0, 0), (255, 0, 255), original_img) d_mm_mid = calSize(x1_mid, y1_mid, x2_mid, y2_mid, (0, 255, 0), (0, 0, 255), original_img) largest_d_mm = max(int(d_mm_mid),int(d_mm_pip)) return original_img, largest_d_mm, imgcpy, marked_img def mark_hand_landmarks(image_path): mp_hands = mp.solutions.hands hands = mp_hands.Hands() mp_draw = mp.solutions.drawing_utils img = image_path img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) results = hands.process(img_rgb) if results.multi_hand_landmarks: for hand_landmarks in results.multi_hand_landmarks: mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS) mcp = hand_landmarks.landmark[13] pip = hand_landmarks.landmark[14] img_height, img_width, _ = img.shape mcp_x, mcp_y = int(mcp.x * img_width), int(mcp.y * img_height) pip_x, pip_y = int(pip.x * img_width), int(pip.y * img_height) cv2.circle(img, (mcp_x, mcp_y), 10, (255, 0, 0), -1) cv2.circle(img, (pip_x, pip_y), 10, (255, 0, 0), -1) return img def show_resized_image(images, titles, scale=0.5): num_images = len(images) fig, axes = plt.subplots(2, 3, figsize=(17, 13)) axes = axes.flatten() for ax in axes[num_images:]: ax.axis('off') i = 0 for ax, img, title in zip(axes, images, titles): i = i + 1 print(i) resized_image = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) ax.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)) ax.set_title(title) ax.axis('off') plt.tight_layout() img_stream = BytesIO() plt.savefig(img_stream, format='png') img_stream.seek(0) plt.close(fig) return img_stream def get_ring_size(mm_value): if mm_value in ring_size_dict: return ring_size_dict[mm_value] else: closest_mm = min(ring_size_dict.keys(), key=lambda x: abs(x - mm_value)) return ring_size_dict[closest_mm] # st.set_page_config(layout="wide", page_title="Ring Size Measurement") st.write("## Determine Your Ring Size") st.write( "📏 Upload an image of your hand to measure the finger width and determine your ring size. The measurement will be displayed along with a visual breakdown of the image processing flow." ) st.sidebar.write("## Upload :gear:") #~~ st.write("### Workflow Overview") st.image("FlowChart.png", caption="Workflow Overview", use_column_width=True) st.write("### Detailed Workflow") st.write("1. **Florence-2 Model:** Florence-2 is an advanced vision foundation model that uses a prompt-based approach to handle a wide range of vision and vision-language tasks.We utilize this model to detect the scale within the image and mark a bounding box which we can use to find the approximate full measurement of scale.") st.write("2. **Pixel Per Metric Ratio:** The Pixel Per Metric Ratio is used to convert pixel measurements into real-world units. By comparing the pixel length obtained from image analysis (i.e., Hough Circle) with the known real-world measurement of the reference object (coin), we get the ratio. This ratio then allows us to accurately scale and size estimation of objects within the image.") st.write("3. **Background Removal:** Removing the background first ensures that only the relevant subject is highlighted. We start by converting the image to grayscale and applying thresholding to distinguish the subject from the background. Erosion and dilation then clean up the image, improving the detection of specific features like individual fingers.") st.write("4. **Contour Detection:** We use Contour Detection to find the largest contour, which allows us to outline or draw a boundary around the subject (i.e., hand). This highlights the object's shape and edges, improving the precision of the subject.") st.write("5. **Finding Hand Landmarks:** This involves using the MediaPipe library to identify key points on the hand, such as the PIP (Proximal Interphalangeal) and MCP (Metacarpophalangeal) joints of the ring finger. This enables precise tracking and analysis of finger positions and movements.") st.write("6. **Determining Finger Width:** Here we use the slope formula `[y = mx + b]` with PIP and MCP points to measure the finger's width. We project outward perpendicularly from the PIP point towards the MCP point, then apply a point polygon test to accurately determine the pixel width of the finger.") st.write("7. **Predicting Ring Size:** Predicting Ring Size involves calculating the finger’s diameter using the Pixel Per Metric Ratio and the largest width measurement at the PIP or MCP joint. This diameter is then used to predict the appropriate ring size.") #~~ MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB def process_image_and_get_results(upload): image = Image.open(upload) # image = cv2.imread(upload) image_np = np.array(image) image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) original_img = image_np.copy() og_img1 = image_np.copy() og_img2 = image_np.copy() img_1 = image_np.copy() hand_lms = mark_hand_landmarks(img_1) pixel_per_metric, mm_per_pixel, image_with_coin_info = calculate_pixel_per_metric(image_np) processed_image = process_image(og_img1) image_with_pip_width, width_mm, contour_image, pip_mark_img = calculate_pip_width(processed_image, original_img, pixel_per_metric) image_with_coin_info = np.array(image_with_coin_info) if image_with_coin_info is None: print("inside1") raise ValueError("Image is None, cannot resize.") elif not isinstance(image_with_coin_info, (np.ndarray, cv2.UMat)): print("inside2") raise TypeError(f"Invalid image type: {type(image_with_coin_info)}. Expected numpy array or cv2.UMat.") ring_size = get_ring_size(width_mm) return { "processed_image": image_with_pip_width, "original_image": og_img2, "hand_lm_marked_image": hand_lms, "image_with_coin_info": image_with_coin_info, "contour_image": contour_image, "width_mm": width_mm, "ring_size": ring_size } def show_how_it_works(processed_image): st.write("## How It Works") st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:") st.image(processed_image, caption="Image Processing Flow", use_column_width=True) col1, col2 = st.columns(2) my_upload = st.sidebar.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) if my_upload is not None: if my_upload.size > MAX_FILE_SIZE: st.error("The uploaded file is too large. Please upload an image smaller than 5MB.") else: st.write("## Image Processing Flow") results = process_image_and_get_results(my_upload) col1.write("Uploaded Image :camera:") col1.image(cv2.cvtColor(results["original_image"], cv2.COLOR_BGR2RGB), caption="Uploaded Image") col2.write("Processed Image :wrench:") col2.image(cv2.cvtColor(results["processed_image"], cv2.COLOR_BGR2RGB), caption="Processed Image with PIP Width") st.write(f"📏 The width of your finger is {results['width_mm']:.2f} mm, and the estimated ring size is {results['ring_size']:.1f}.") if st.button("How it Works"): st.write("## How It Works") st.write("Here's a step-by-step breakdown of how your image is processed to determine your ring size:") print("here") img_stream = show_resized_image( [results["original_image"], results["image_with_coin_info"], results["contour_image"], results["hand_lm_marked_image"], results["processed_image"]], ['Original Image', 'Image with Scale Info', 'Contour Boundary Image', 'Hand Landmarks', 'Ring Finger Width'], scale=0.5 ) st.image(img_stream, caption="Processing Flow", use_column_width=True) else: st.info("Please upload an image to get started.")