hackerloi45 commited on
Commit
a06f639
Β·
1 Parent(s): f77c4c2
Files changed (1) hide show
  1. app.py +95 -94
app.py CHANGED
@@ -7,22 +7,29 @@ from PIL import Image
7
  import gradio as gr
8
  import numpy as np
9
 
 
10
  from sentence_transformers import SentenceTransformer
 
 
11
  from google import genai
 
 
12
  from qdrant_client import QdrantClient
13
  from qdrant_client.http.models import VectorParams, Distance, PointStruct
14
 
15
  # -------------------------
16
- # CONFIG
17
  # -------------------------
18
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "").strip()
19
  QDRANT_URL = os.environ.get("QDRANT_URL", "").strip()
20
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", "").strip()
21
 
 
 
 
22
  print("Loading CLIP model (this may take 20-60s the first time)...")
23
  MODEL_ID = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
24
  clip_model = SentenceTransformer(MODEL_ID)
25
- VECTOR_SIZE = clip_model.get_sentence_embedding_dimension()
26
 
27
  genai_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
28
 
@@ -31,7 +38,9 @@ if not QDRANT_URL:
31
 
32
  qclient = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
33
  COLLECTION = "lost_found_items"
 
34
 
 
35
  try:
36
  if not qclient.collection_exists(COLLECTION):
37
  qclient.create_collection(
@@ -45,12 +54,11 @@ except Exception as e:
45
  # Helpers
46
  # -------------------------
47
  def embed_text(text: str):
48
- return clip_model.encode([text], convert_to_numpy=True)[0]
49
 
50
  def embed_image_pil(pil_img: Image.Image):
51
  pil_img = pil_img.convert("RGB")
52
- np_img = np.array(pil_img)
53
- return clip_model.encode([np_img], convert_to_numpy=True)[0]
54
 
55
  def gen_tags_from_image_file(image_bytes: io.BytesIO) -> str:
56
  if genai_client is None:
@@ -59,70 +67,64 @@ def gen_tags_from_image_file(image_bytes: io.BytesIO) -> str:
59
  file_obj = genai_client.files.upload(file=image_bytes)
60
  prompt_text = (
61
  "Give 4 short tags (comma-separated) describing this item in the image. "
62
- "Tags should be short single words or two-word phrases. Respond only with tags."
 
63
  )
64
  response = genai_client.models.generate_content(
65
  model="gemini-2.5-flash",
66
  contents=[prompt_text, file_obj],
67
  )
68
  return response.text.strip()
69
- except Exception as e:
70
- print("Error generating tags:", e)
71
- return ""
72
-
73
- def decode_image_from_b64(b64_str: str):
74
- try:
75
- img_bytes = base64.b64decode(b64_str)
76
- return Image.open(io.BytesIO(img_bytes))
77
  except Exception:
78
- return None
79
 
80
  # -------------------------
81
- # Add item
82
  # -------------------------
83
  def add_item(mode: str, uploaded_image, text_description: str, finder_name: str, finder_phone: str):
84
  item_id = str(uuid.uuid4())
85
  payload = {"mode": mode, "text": text_description}
86
 
87
- # If "found", save finder info
88
  if mode == "found":
89
  payload["finder_name"] = finder_name
90
  payload["finder_phone"] = finder_phone
91
 
92
- try:
93
- if uploaded_image is not None:
94
- img_bytes = io.BytesIO()
95
- uploaded_image.convert("RGB").save(img_bytes, format="PNG")
96
- img_bytes.seek(0)
97
-
98
- vec = embed_image_pil(uploaded_image).tolist()
99
- payload["has_image"] = True
100
- payload["tags"] = gen_tags_from_image_file(img_bytes)
101
- payload["image_b64"] = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
102
- else:
103
- vec = embed_text(text_description).tolist()
104
- payload["has_image"] = False
105
- if genai_client:
106
- try:
107
- resp = genai_client.models.generate_content(
108
- model="gemini-2.5-flash",
109
- contents=f"Give 4 short, comma-separated tags for this item described as: {text_description}. Reply only with tags."
110
- )
111
- payload["tags"] = resp.text.strip()
112
- except Exception:
113
- payload["tags"] = ""
114
- else:
115
  payload["tags"] = ""
 
 
116
 
 
117
  point = PointStruct(id=item_id, vector=vec, payload=payload)
118
  qclient.upsert(collection_name=COLLECTION, points=[point], wait=True)
119
-
120
- return f"βœ… Saved item id: {item_id}\nTags: {payload.get('tags','')}"
121
  except Exception as e:
122
- return f"❌ Error saving to Qdrant: {e}"
 
 
123
 
124
  # -------------------------
125
- # Search
126
  # -------------------------
127
  def search_items(query_image, query_text, limit: int = 5, min_score: float = 0.90):
128
  if query_image is not None:
@@ -130,86 +132,85 @@ def search_items(query_image, query_text, limit: int = 5, min_score: float = 0.9
130
  elif query_text and len(query_text.strip()) > 0:
131
  qvec = embed_text(query_text).tolist()
132
  else:
133
- return [], "⚠️ Please provide a query image or some query text."
134
 
135
  try:
136
  hits = qclient.search(collection_name=COLLECTION, query_vector=qvec, limit=limit)
137
  except Exception as e:
138
- return [], f"❌ Error querying Qdrant: {e}"
139
 
140
  if not hits:
141
- return [], "No results found."
142
 
143
- images, captions = [], []
 
144
  for h in hits:
145
  score = getattr(h, "score", None)
146
  if score is None or score < min_score:
147
  continue
148
 
149
  payload = h.payload or {}
150
- caption = f"ID: {h.id}\nScore: {score:.4f}\nMode: {payload.get('mode','')}\nTags: {payload.get('tags','')}\nText: {payload.get('text','')}"
151
-
152
- # If it's a found item, show finder details
153
- if payload.get("mode") == "found":
154
- caption += f"\nπŸ‘€ Finder: {payload.get('finder_name','N/A')} | πŸ“ž {payload.get('finder_phone','N/A')}"
155
 
156
- captions.append(caption)
 
 
 
 
 
157
 
158
- if payload.get("has_image") and payload.get("image_b64"):
159
- img = decode_image_from_b64(payload["image_b64"])
160
- if img:
161
- images.append(img)
162
- else:
163
- images.append(Image.new("RGB", (200,200), color="gray"))
164
- else:
165
- img = Image.new("RGB", (200,200), color="lightblue")
166
- images.append(img)
167
 
168
- if not images:
169
- return [], f"No results above similarity threshold {min_score:.2f}"
170
 
171
- return list(zip(images, captions)), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  # -------------------------
174
  # Gradio UI
175
  # -------------------------
176
  with gr.Blocks(title="Lost & Found β€” Simple Helper") as demo:
177
- gr.Markdown("## 🧳 Lost & Found Helper β€” Upload items, then search by image or text.")
178
  with gr.Row():
179
  with gr.Column():
180
  mode = gr.Radio(choices=["lost", "found"], value="lost", label="Add as")
181
  upload_img = gr.Image(type="pil", label="Item photo (optional)")
182
- text_desc = gr.Textbox(lines=2, placeholder="Short description", label="Description (optional)")
183
- finder_name = gr.Textbox(lines=1, placeholder="Finder name (only if found)", label="Finder Name")
184
- finder_phone = gr.Textbox(lines=1, placeholder="Finder phone (only if found)", label="Finder Phone")
185
- add_btn = gr.Button("βž• Add item")
186
  add_out = gr.Textbox(label="Add result", interactive=False)
 
 
187
  with gr.Column():
188
- gr.Markdown("### πŸ” Search")
189
  query_img = gr.Image(type="pil", label="Search by image (optional)")
190
  query_text = gr.Textbox(lines=2, label="Search by text (optional)")
191
- limit_slider = gr.Slider(1, 10, value=5, step=1, label="Max results")
192
- score_slider = gr.Slider(0.0, 1.0, value=0.90, step=0.01, label="Min similarity score")
193
- search_btn = gr.Button("πŸ”Ž Search")
194
- gallery = gr.Gallery(
195
- label="Search Results",
196
- show_label=True,
197
- elem_id="gallery",
198
- columns=2,
199
- height="auto"
200
- )
201
- search_msg = gr.Textbox(label="Message", interactive=False)
202
-
203
- add_btn.click(
204
- add_item,
205
- inputs=[mode, upload_img, text_desc, finder_name, finder_phone],
206
- outputs=[add_out]
207
- )
208
- search_btn.click(
209
- search_items,
210
- inputs=[query_img, query_text, limit_slider, score_slider],
211
- outputs=[gallery, search_msg]
212
- )
213
 
214
  if __name__ == "__main__":
215
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
7
  import gradio as gr
8
  import numpy as np
9
 
10
+ # CLIP via Sentence-Transformers
11
  from sentence_transformers import SentenceTransformer
12
+
13
+ # Gemini (Google) client
14
  from google import genai
15
+
16
+ # Qdrant client & helpers
17
  from qdrant_client import QdrantClient
18
  from qdrant_client.http.models import VectorParams, Distance, PointStruct
19
 
20
  # -------------------------
21
+ # CONFIG (reads env vars)
22
  # -------------------------
23
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "").strip()
24
  QDRANT_URL = os.environ.get("QDRANT_URL", "").strip()
25
  QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", "").strip()
26
 
27
+ # -------------------------
28
+ # Initialize clients/models
29
+ # -------------------------
30
  print("Loading CLIP model (this may take 20-60s the first time)...")
31
  MODEL_ID = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
32
  clip_model = SentenceTransformer(MODEL_ID)
 
33
 
34
  genai_client = genai.Client(api_key=GEMINI_API_KEY) if GEMINI_API_KEY else None
35
 
 
38
 
39
  qclient = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
40
  COLLECTION = "lost_found_items"
41
+ VECTOR_SIZE = clip_model.get_sentence_embedding_dimension()
42
 
43
+ # Create collection if missing
44
  try:
45
  if not qclient.collection_exists(COLLECTION):
46
  qclient.create_collection(
 
54
  # Helpers
55
  # -------------------------
56
  def embed_text(text: str):
57
+ return clip_model.encode(text, convert_to_numpy=True)
58
 
59
  def embed_image_pil(pil_img: Image.Image):
60
  pil_img = pil_img.convert("RGB")
61
+ return clip_model.encode(pil_img, convert_to_numpy=True)
 
62
 
63
  def gen_tags_from_image_file(image_bytes: io.BytesIO) -> str:
64
  if genai_client is None:
 
67
  file_obj = genai_client.files.upload(file=image_bytes)
68
  prompt_text = (
69
  "Give 4 short tags (comma-separated) describing this item in the image. "
70
+ "Tags should be short single words or two-word phrases (e.g. 'black backpack', 'water bottle'). "
71
+ "Respond only with tags, no extra explanation."
72
  )
73
  response = genai_client.models.generate_content(
74
  model="gemini-2.5-flash",
75
  contents=[prompt_text, file_obj],
76
  )
77
  return response.text.strip()
 
 
 
 
 
 
 
 
78
  except Exception:
79
+ return ""
80
 
81
  # -------------------------
82
+ # App logic: add item
83
  # -------------------------
84
  def add_item(mode: str, uploaded_image, text_description: str, finder_name: str, finder_phone: str):
85
  item_id = str(uuid.uuid4())
86
  payload = {"mode": mode, "text": text_description}
87
 
88
+ # Found item extra details
89
  if mode == "found":
90
  payload["finder_name"] = finder_name
91
  payload["finder_phone"] = finder_phone
92
 
93
+ if uploaded_image is not None:
94
+ img_bytes = io.BytesIO()
95
+ uploaded_image.convert("RGB").save(img_bytes, format="PNG")
96
+ img_bytes.seek(0)
97
+
98
+ vec = embed_image_pil(uploaded_image).tolist()
99
+ payload["has_image"] = True
100
+
101
+ payload["tags"] = gen_tags_from_image_file(img_bytes)
102
+ payload["image_b64"] = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
103
+ else:
104
+ vec = embed_text(text_description).tolist()
105
+ payload["has_image"] = False
106
+ if genai_client:
107
+ try:
108
+ resp = genai_client.models.generate_content(
109
+ model="gemini-2.5-flash",
110
+ contents=f"Give 4 short, comma-separated tags for this item described as: {text_description}. Reply only with tags."
111
+ )
112
+ payload["tags"] = resp.text.strip()
113
+ except Exception:
 
 
114
  payload["tags"] = ""
115
+ else:
116
+ payload["tags"] = ""
117
 
118
+ try:
119
  point = PointStruct(id=item_id, vector=vec, payload=payload)
120
  qclient.upsert(collection_name=COLLECTION, points=[point], wait=True)
 
 
121
  except Exception as e:
122
+ return f"Error saving to Qdrant: {e}"
123
+
124
+ return f"βœ… Saved item id: {item_id}\nTags: {payload.get('tags','')}"
125
 
126
  # -------------------------
127
+ # App logic: search
128
  # -------------------------
129
  def search_items(query_image, query_text, limit: int = 5, min_score: float = 0.90):
130
  if query_image is not None:
 
132
  elif query_text and len(query_text.strip()) > 0:
133
  qvec = embed_text(query_text).tolist()
134
  else:
135
+ return "⚠️ Please provide a query image or some query text.", []
136
 
137
  try:
138
  hits = qclient.search(collection_name=COLLECTION, query_vector=qvec, limit=limit)
139
  except Exception as e:
140
+ return f"❌ Error querying Qdrant: {e}", []
141
 
142
  if not hits:
143
+ return "No results found.", []
144
 
145
+ results_text = []
146
+ results_imgs = []
147
  for h in hits:
148
  score = getattr(h, "score", None)
149
  if score is None or score < min_score:
150
  continue
151
 
152
  payload = h.payload or {}
153
+ text_entry = (
154
+ f"id:{h.id} | score:{score:.4f} | mode:{payload.get('mode','')} | tags:{payload.get('tags','')} "
155
+ f"| text:{payload.get('text','')} | finder:{payload.get('finder_name','-')} | phone:{payload.get('finder_phone','-')}"
156
+ )
157
+ results_text.append(text_entry)
158
 
159
+ if payload.get("has_image") and "image_b64" in payload:
160
+ try:
161
+ img = Image.open(io.BytesIO(base64.b64decode(payload["image_b64"])))
162
+ results_imgs.append(img)
163
+ except Exception:
164
+ pass
165
 
166
+ if not results_text:
167
+ return f"No results above similarity threshold {min_score}", []
 
 
 
 
 
 
 
168
 
169
+ return "\n\n".join(results_text), results_imgs
 
170
 
171
+ # -------------------------
172
+ # App logic: clear images
173
+ # -------------------------
174
+ def clear_all_images():
175
+ try:
176
+ qclient.delete(
177
+ collection_name=COLLECTION,
178
+ points_selector={
179
+ "filter": {"must": [{"key": "has_image", "match": {"value": True}}]}
180
+ }
181
+ )
182
+ return "πŸ—‘οΈ All items with images have been cleared!"
183
+ except Exception as e:
184
+ return f"❌ Error while clearing images: {e}"
185
 
186
  # -------------------------
187
  # Gradio UI
188
  # -------------------------
189
  with gr.Blocks(title="Lost & Found β€” Simple Helper") as demo:
190
+ gr.Markdown("## Lost & Found Helper (image/text search) β€” upload items, then search by image or text.")
191
  with gr.Row():
192
  with gr.Column():
193
  mode = gr.Radio(choices=["lost", "found"], value="lost", label="Add as")
194
  upload_img = gr.Image(type="pil", label="Item photo (optional)")
195
+ text_desc = gr.Textbox(lines=2, placeholder="Short description (e.g. 'black backpack with blue zipper')", label="Description (optional)")
196
+ finder_name = gr.Textbox(label="Finder Name (only if found)", placeholder="e.g. John Doe")
197
+ finder_phone = gr.Textbox(label="Finder Phone (only if found)", placeholder="e.g. +1234567890")
198
+ add_btn = gr.Button("Add item")
199
  add_out = gr.Textbox(label="Add result", interactive=False)
200
+ clear_btn = gr.Button("Clear All Images")
201
+ clear_out = gr.Textbox(label="Clear Result", interactive=False)
202
  with gr.Column():
203
+ gr.Markdown("### Search")
204
  query_img = gr.Image(type="pil", label="Search by image (optional)")
205
  query_text = gr.Textbox(lines=2, label="Search by text (optional)")
206
+ score_slider = gr.Slider(0.5, 1.0, value=0.90, step=0.01, label="Min similarity threshold")
207
+ search_btn = gr.Button("Search")
208
+ search_out = gr.Textbox(label="Search results (text)", interactive=False)
209
+ gallery = gr.Gallery(label="Search Results", show_label=True, elem_id="gallery", columns=2, height="auto")
210
+
211
+ add_btn.click(add_item, inputs=[mode, upload_img, text_desc, finder_name, finder_phone], outputs=[add_out])
212
+ search_btn.click(search_items, inputs=[query_img, query_text, gr.Number(value=5, visible=False), score_slider], outputs=[search_out, gallery])
213
+ clear_btn.click(clear_all_images, outputs=[clear_out])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  if __name__ == "__main__":
216
  demo.launch(server_name="0.0.0.0", server_port=7860)