Spaces:

roy214
/

ImageTextRetrieval

Sleeping

App Files Files Community

roy214 commited on May 9, 2025

Commit

e399303

verified ·

1 Parent(s): 63b530b

Upload 2 files

Browse files

Files changed (2) hide show

src/streamlit_app.py +204 -0
src/styles.csv +0 -0

src/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import streamlit as st
+import boto3
+from botocore.exceptions import NoCredentialsError
+from io import BytesIO
+from PIL import Image
+import pandas as pd
+import matplotlib.pyplot as plt
+import os
+import faiss
+import pickle
+import torch
+from transformers import CLIPModel, CLIPProcessor
+from huggingface_hub import hf_hub_download, snapshot_download
+import json
+import requests
+# Khởi tạo client S3 với thông tin cấu hình từ secrets
+s3 = boto3.client('s3')
+def get_image_from_s3(bucket_name, img_id):
+    try:
+        # Trả về URL S3 trực tiếp cho ảnh
+        img_url = f"https://{bucket_name}.s3.amazonaws.com/{img_id}.jpg"
+        return img_url
+    except Exception as e:
+        st.error(f"Error constructing image URL: {e}")
+        return None
+def show_img(img_id, score=None, col=None):
+    # Lấy URL ảnh từ S3
+    img_url = get_image_from_s3(bucket_name, img_id)
+    if img_url:
+        try:
+            # Tải ảnh từ URL S3
+            response = requests.get(img_url)
+            response.raise_for_status()  # Kiểm tra nếu có lỗi trong quá trình tải ảnh
+            # Mở ảnh từ dữ liệu trong bộ nhớ
+            img = Image.open(BytesIO(response.content))
+            # Lấy thông tin style từ img_id (giả sử bạn có một dataframe style)
+            img_style = style[style['id'] == int(img_id)]
+            if not img_style.empty:
+                parts = []
+                parts.append(str(img_style['gender'].values[0]))
+                parts.append(str(img_style['masterCategory'].values[0]))
+                parts.append(str(img_style['subCategory'].values[0]))
+                parts.append(str(img_style['articleType'].values[0]))
+                parts.append(str(img_style['baseColour'].values[0]))
+                parts.append(str(img_style['year'].values[0]))
+                parts.append(str(img_style['usage'].values[0]))
+                parts.append(str(img_style['productDisplayName'].values[0]))
+                text = '- '.join(parts)
+                if score:
+                    text += f'\n\n Score: {score:.2f}'
+                # Hiển thị ảnh trong cột
+                if col:
+                    col.image(img, caption=text, use_container_width=True)
+            else:
+                st.write("img_style is empty")
+        except requests.exceptions.RequestException as e:
+            st.error(f"Error fetching image: {e}")
+        except Exception as e:
+            st.error(f"Error processing image: {e}")
+def search_faiss(model, processor, index, id_map, prompt, top_k=5, device='cpu'):
+    st.write(f"Running FAISS search for prompt: '{prompt}' with top_k={top_k}")
+    inputs = processor(text=[prompt], return_tensors='pt', padding=True).to(device)
+    st.write("Prompt processed by tokenizer.")
+    with torch.no_grad():
+        txt_emb = model.get_text_features(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
+        txt_emb = txt_emb / txt_emb.norm(p=2, dim=-1, keepdim=True)
+    st.write("Text embedding computed.")
+    q = txt_emb.cpu().numpy().astype('float32')
+    D, I = index.search(q, top_k)
+    st.write("FAISS search completed.")
+    # st.write("Indices returned:", I[0])
+    # st.write("Scores returned:", D[0])
+    # st.write("ID map keys sample:", list(id_map.keys())[:10])
+    return [(id_map[i], float(D[0][j])) for j, i in enumerate(I[0])]
+def running(prompt, top_k=5):
+    st.write("Starting image retrieval...")
+    results = search_faiss(
+        model, processor,
+        index, id_map,
+        prompt=prompt,
+        top_k=top_k,
+    )
+    # Chia thành các cột (5 ảnh mỗi hàng)
+    cols = st.columns(5)  # Chia thành 5 cột
+    col_idx = 0
+    for img_id, score in results:
+        # st.write(f"results: {img_id} và {score}")
+        show_img(img_id, score, col=cols[col_idx])
+        col_idx += 1
+        if col_idx == 5:  # Sau khi hiển thị 5 ảnh, reset cột
+            col_idx = 0
+    if not results:
+        st.warning("No results were returned from FAISS. Check your prompt or embedding.")
+# Đọc file CSV
+current_dir = os.path.dirname(__file__)
+csv_path = os.path.join(current_dir, 'styles.csv')
+style = pd.read_csv(csv_path, usecols=range(10))  # Sửa lại đường dẫn nếu cần
+bucket_name = "image-text-retrieval"  # Tên bucket của bạn
+your_username = 'roy214'
+# Dùng thư mục được phép ghi
+os.environ["HF_HOME"] = "/tmp/huggingface"
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
+os.environ["HF_HUB_CACHE"] = "/tmp/huggingface"
+hf_token = os.environ["HUGGINGFACE_TOKEN"]
+your_username = "roy214"
+model_repo = f"{your_username}/clip-finetuned-fashion"
+# Tải toàn bộ repo về thư mục /tmp
+model_dir = snapshot_download(
+    repo_id=model_repo,
+    token=hf_token,
+    local_dir="/tmp/model",         # Chỉ định nơi lưu
+    local_dir_use_symlinks=False    # Tránh tạo symlink vào /.cache
+)
+# Load model using the local path + token
+model = CLIPModel.from_pretrained(
+    model_dir,
+    use_auth_token=hf_token,
+    device_map="auto",            # Tự động phân phối weights lên CPU/GPU
+    low_cpu_mem_usage=True,       # Giảm RAM khi load
+).eval()
+index_path   = os.path.join(model_dir, "faiss_index.bin")
+mapping_path = os.path.join(model_dir, "id_map.json")
+# Kiểm tra file tồn tại
+assert os.path.isfile(index_path),   f"Không tìm thấy {index_path}"
+assert os.path.isfile(mapping_path), f"Không tìm thấy {mapping_path}"
+# Load index
+index = faiss.read_index(index_path)
+# 4. Load processor cũng từ thư mục local
+processor = CLIPProcessor.from_pretrained(
+    model_dir,
+    use_auth_token=hf_token
+)
+with open(mapping_path, "rb") as f:
+    id_map = pickle.load(f)
+st.title("Fashion Product Image Retrieval")
+st.markdown("""
+### **Overview**
+In this project, I demonstrate an **Image Retrieval** system for fashion products. The system uses a fine-tuned **CLIP model** (`clip-vit-base-patch32`) to match images with relevant text descriptions. We have a dataset of **1000 fashion product images**, stored on **Amazon S3**. Each image is associated with detailed product descriptions, such as **product type**, **color**, **category**, and **brand**.
+The goal of this system is to retrieve the most relevant fashion images based on a given text prompt (e.g., "red dress") and vice versa. With this system, users can search for fashion products in a more intuitive, text-based manner.
+#### Key Features:
+- **Dataset**: 1000 fashion product images with descriptive text.
+- **Storage**: Images are stored on **Amazon S3**.
+- **Model**: Fine-tuned **OpenAI CLIP model** (`clip-vit-base-patch32`) on the dataset.
+- **Objective**: Given a prompt like "red dress", the system retrieves the most relevant images.
+""")
+# Example to show some images
+st.subheader("Some sample images and their captions:")
+example = [13422, 10037, 38246, 23273, 2008]
+example_cols = st.columns(5)  # Chia thành 5 cột
+for idx, img_id in enumerate(example):
+    show_img(img_id, None, example_cols[idx])
+# Chạy ví dụ với prompt
+st.subheader("Example usage: enter a prompt to retrieve related images")
+with st.form(key="retrieval_form"):
+    prompt_input = st.text_input("Enter a prompt", placeholder="e.g., a red Apparel dress")
+    top_k_input = st.number_input("Enter the number of results (top_k)", min_value=1, max_value=10, value=5, step=1)
+    submitted = st.form_submit_button(label="Find Related Images")
+# Khi người dùng nhấn nút Submit
+if submitted:
+    if prompt_input.strip() and top_k_input > 0:
+        running(prompt_input, top_k_input)
+    else:
+        st.warning("Please enter a valid prompt and top_k.")

src/styles.csv ADDED Viewed

The diff for this file is too large to render. See raw diff