vumichien commited on
Commit
57de00b
·
1 Parent(s): ac39bd2

change methodology to save file

Browse files
Files changed (6) hide show
  1. .gitignore +44 -0
  2. Dockerfile +9 -0
  3. app.py +289 -252
  4. download_images.py +108 -0
  5. static/metadata/.gitkeep +1 -0
  6. static/uploads/.gitkeep +1 -0
.gitignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ ENV/
26
+ .env
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Logs
35
+ *.log
36
+
37
+ # Image uploads and metadata - keep directories but ignore content
38
+ static/uploads/*
39
+ !static/uploads/.gitkeep
40
+ static/metadata/*
41
+ !static/metadata/.gitkeep
42
+
43
+ # Hugging Face
44
+ .huggingface/
Dockerfile CHANGED
@@ -35,6 +35,15 @@ ENV ENV=production
35
  # ENV HF_TOKEN=your-token
36
  # ENV HF_DATASET_REPO=image-uploader-data
37
 
 
 
 
 
 
 
 
 
 
38
  # Expose port for Hugging Face Spaces (uses port 7860)
39
  EXPOSE 7860
40
 
 
35
  # ENV HF_TOKEN=your-token
36
  # ENV HF_DATASET_REPO=image-uploader-data
37
 
38
+ # Add script to download images from Hugging Face during build
39
+ COPY download_images.py .
40
+ RUN if [ -n "$HF_USERNAME" ] && [ -n "$HF_TOKEN" ] && [ -n "$HF_DATASET_REPO" ]; then \
41
+ echo "Downloading images from Hugging Face dataset..." && \
42
+ python download_images.py; \
43
+ else \
44
+ echo "Hugging Face credentials not set, skipping image download."; \
45
+ fi
46
+
47
  # Expose port for Hugging Face Spaces (uses port 7860)
48
  EXPOSE 7860
49
 
app.py CHANGED
@@ -63,6 +63,9 @@ HF_TOKEN = os.environ.get("HF_TOKEN", "") # Set this in Hugging Face Space sett
63
  DATASET_REPO = os.environ.get("HF_DATASET_REPO", "image-uploader-data")
64
  IMAGES_PATH = "images"
65
  METADATA_PATH = "metadata"
 
 
 
66
 
67
  # Set HF cache directory to a writable location
68
  # This is necessary for Hugging Face Spaces which has permission issues with the default cache location
@@ -136,34 +139,45 @@ def verify_auth(request: Request):
136
  return True
137
 
138
  def get_image_metadata():
139
- """Get all image metadata including hashtags."""
140
- # In production, get metadata from Hugging Face
141
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
142
- try:
143
- print(f"Fetching metadata from Hugging Face repository {HF_USERNAME}/{DATASET_REPO}")
144
- metadata_file = hf_api.hf_hub_download(
145
- repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
146
- filename=f"{METADATA_PATH}/image_metadata.json",
147
- repo_type="dataset",
148
- token=HF_TOKEN,
149
- local_dir=os.path.join(tempfile.gettempdir(), "hf_downloads")
150
- )
151
- print(f"Metadata downloaded to {metadata_file}")
152
- with open(metadata_file, "r") as f:
153
- return json.load(f)
154
- except Exception as e:
155
- print(f"Error fetching metadata from Hugging Face: {e}")
156
- # Return empty dict if failed
157
- return {}
158
- # Local development fallback
159
- elif METADATA_FILE.exists():
160
  with open(METADATA_FILE, "r") as f:
161
- return json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  return {}
163
 
164
  def save_image_metadata(metadata):
165
- """Save image metadata to the JSON file."""
166
- # In production, save to Hugging Face
 
 
 
 
167
  if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
168
  try:
169
  print(f"Saving metadata to Hugging Face repository {HF_USERNAME}/{DATASET_REPO}")
@@ -175,16 +189,9 @@ def save_image_metadata(metadata):
175
  repo_type="dataset",
176
  token=HF_TOKEN
177
  )
178
- print(f"Metadata saved successfully")
179
  except Exception as e:
180
  print(f"Error saving metadata to Hugging Face: {e}")
181
- # Still save locally as fallback
182
- with open(METADATA_FILE, "w") as f:
183
- json.dump(metadata, f)
184
- else:
185
- # Local development or fallback
186
- with open(METADATA_FILE, "w") as f:
187
- json.dump(metadata, f)
188
 
189
  def add_hashtags_to_image(filename, hashtags, original_filename=None):
190
  """Add hashtags to an image."""
@@ -210,46 +217,12 @@ def mark_image_as_viewed(filename):
210
  metadata[filename]["is_new"] = False
211
  save_image_metadata(metadata)
212
 
213
- def upload_to_hf(file_content, filename):
214
- """Upload a file to Hugging Face Dataset Repository."""
215
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
216
- try:
217
- print(f"Uploading file {filename} to Hugging Face repository {HF_USERNAME}/{DATASET_REPO}")
218
- hf_api.upload_file(
219
- path_or_fileobj=io.BytesIO(file_content),
220
- path_in_repo=f"{IMAGES_PATH}/{filename}",
221
- repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
222
- repo_type="dataset",
223
- token=HF_TOKEN
224
- )
225
- print(f"File {filename} uploaded successfully")
226
- return True
227
- except Exception as e:
228
- print(f"Error uploading to Hugging Face: {e}")
229
- return False
230
- return False
231
-
232
- def delete_from_hf(filename):
233
- """Delete a file from Hugging Face Dataset Repository."""
234
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
235
- try:
236
- print(f"Deleting file {filename} from Hugging Face repository {HF_USERNAME}/{DATASET_REPO}")
237
- hf_api.delete_file(
238
- path_in_repo=f"{IMAGES_PATH}/{filename}",
239
- repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
240
- repo_type="dataset",
241
- token=HF_TOKEN
242
- )
243
- print(f"File {filename} deleted successfully")
244
- return True
245
- except Exception as e:
246
- print(f"Error deleting from Hugging Face: {e}")
247
- return False
248
- return False
249
-
250
  def get_hf_image_url(filename):
251
  """Get the URL for an image in the Hugging Face repo."""
252
- if HF_USERNAME:
 
 
 
253
  return f"https://huggingface.co/datasets/{HF_USERNAME}/{DATASET_REPO}/resolve/main/{IMAGES_PATH}/{filename}"
254
  return None
255
 
@@ -309,81 +282,61 @@ async def home(request: Request, search: Optional[str] = None, tag: Optional[str
309
  # Check if user is authenticated
310
  if not authenticate(request):
311
  return RedirectResponse(url="/login", status_code=status.HTTP_302_FOUND)
312
-
313
  # Get all uploaded images and their metadata
314
  uploaded_images = []
315
  metadata = get_image_metadata()
316
-
317
- # In production environment, get images from Hugging Face
318
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME:
319
- hf_images = list_hf_images()
320
-
321
- for file_name in hf_images:
322
- # Get hashtags from metadata if available
323
- hashtags = []
324
- is_new = False
325
- original_filename = file_name
326
-
327
- if file_name in metadata:
328
- hashtags = metadata[file_name].get("hashtags", [])
329
- is_new = metadata[file_name].get("is_new", False)
330
- original_filename = metadata[file_name].get("original_filename", file_name)
331
-
332
- # If searching/filtering, check if this image should be included
333
- if search and search.lower() not in original_filename.lower() and not any(search.lower() in tag.lower() for tag in hashtags):
334
- continue
335
-
336
- if tag and tag not in hashtags:
337
- continue
338
-
339
- image_url = get_hf_image_url(file_name)
340
-
341
- uploaded_images.append({
342
- "name": file_name,
343
- "url": image_url,
344
- "embed_url": image_url,
345
- "hashtags": hashtags,
346
- "is_new": is_new,
347
- "original_filename": original_filename
348
- })
349
- # Local development fallback
350
- elif UPLOAD_DIR.exists():
351
  for file in UPLOAD_DIR.iterdir():
352
  if is_valid_image(get_file_extension(file.name)):
353
- image_url = f"/static/uploads/{file.name}"
354
-
355
  # Get hashtags from metadata if available
356
  hashtags = []
357
  is_new = False
358
  original_filename = file.name
359
-
360
  if file.name in metadata:
361
  hashtags = metadata[file.name].get("hashtags", [])
362
  is_new = metadata[file.name].get("is_new", False)
363
  original_filename = metadata[file.name].get("original_filename", file.name)
364
-
365
  # If searching/filtering, check if this image should be included
366
  if search and search.lower() not in original_filename.lower() and not any(search.lower() in tag.lower() for tag in hashtags):
367
  continue
368
-
369
  if tag and tag not in hashtags:
370
  continue
371
-
372
- uploaded_images.append({
373
- "name": file.name,
374
- "url": image_url,
375
- "embed_url": f"{request.base_url}static/uploads/{file.name}",
376
- "hashtags": hashtags,
377
- "is_new": is_new,
378
- "original_filename": original_filename
379
- })
380
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  # Get all unique hashtags for the filter dropdown
382
  all_hashtags = set()
383
  for img_data in metadata.values():
384
  if "hashtags" in img_data:
385
  all_hashtags.update(img_data["hashtags"])
386
-
387
  return templates.TemplateResponse(
388
  "index.html",
389
  {
@@ -395,6 +348,86 @@ async def home(request: Request, search: Optional[str] = None, tag: Optional[str
395
  }
396
  )
397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  @app.post("/upload/")
399
  async def upload_image(
400
  request: Request,
@@ -408,31 +441,22 @@ async def upload_image(
408
  status_code=status.HTTP_401_UNAUTHORIZED,
409
  content={"detail": "Not authenticated"}
410
  )
411
-
412
  # Process hashtags into a list
413
  hashtag_list = []
414
  if hashtags:
415
  # Split by spaces or commas and remove empty strings/whitespace
416
  hashtag_list = [tag.strip() for tag in hashtags.replace(',', ' ').split() if tag.strip()]
417
-
418
  results = []
419
  duplicates = []
420
-
421
  # First, check for duplicate filenames
422
  metadata = get_image_metadata()
423
  all_files = {}
424
-
425
- # In production, check for duplicates in Hugging Face repo
426
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME:
427
- hf_images = list_hf_images()
428
- for filename in hf_images:
429
- # Get original filename from metadata if available
430
- original_name = filename
431
- if filename in metadata and "original_filename" in metadata[filename]:
432
- original_name = metadata[filename]["original_filename"]
433
- all_files[original_name.lower()] = filename
434
- # Local development fallback
435
- elif UPLOAD_DIR.exists():
436
  for file in UPLOAD_DIR.iterdir():
437
  if is_valid_image(get_file_extension(file.name)):
438
  # Get original filename from metadata if available
@@ -440,7 +464,7 @@ async def upload_image(
440
  if file.name in metadata and "original_filename" in metadata[file.name]:
441
  original_name = metadata[file.name]["original_filename"]
442
  all_files[original_name.lower()] = file.name
443
-
444
  # Check for duplicates in current upload batch
445
  for file in files:
446
  file_lower = file.filename.lower()
@@ -451,7 +475,7 @@ async def upload_image(
451
  "existing_file": all_files[file_lower],
452
  "original_name": file.filename
453
  })
454
-
455
  # If we found duplicates, return them to the frontend for confirmation
456
  if duplicates:
457
  return {
@@ -460,45 +484,36 @@ async def upload_image(
460
  "message": "Duplicate filenames detected",
461
  "action_required": "confirm_replace"
462
  }
463
-
464
  # No duplicates, proceed with upload
465
  for file in files:
466
  # Check if the file is an image
467
  extension = get_file_extension(file.filename)
468
  if not is_valid_image(extension):
469
  continue # Skip non-image files
470
-
471
  # Preserve original filename in metadata but make it safe for filesystem
472
  original_filename = file.filename
473
-
474
  # Generate a unique filename to prevent overwrites
475
  unique_filename = f"{uuid.uuid4()}{extension}"
476
-
477
  # Read file content for upload
478
  file.file.seek(0)
479
  file_content = await file.read()
480
-
481
- # Save file based on environment
482
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
483
- # Upload to Hugging Face
484
- upload_success = upload_to_hf(file_content, unique_filename)
485
- if not upload_success:
486
- # Fallback to local storage if HF upload fails
487
- file_path = UPLOAD_DIR / unique_filename
488
- with file_path.open("wb") as buffer:
489
- buffer.write(file_content)
490
- else:
491
- # Local development storage
492
- file_path = UPLOAD_DIR / unique_filename
493
- with file_path.open("wb") as buffer:
494
- buffer.write(file_content)
495
-
496
  # Save hashtags and original filename
497
  add_hashtags_to_image(unique_filename, hashtag_list, original_filename)
498
-
499
  # For base64 encoding
500
  base64_encoded = base64.b64encode(file_content).decode("utf-8")
501
-
502
  # Determine MIME type
503
  mime_type = {
504
  '.jpg': 'image/jpeg',
@@ -508,15 +523,20 @@ async def upload_image(
508
  '.bmp': 'image/bmp',
509
  '.webp': 'image/webp'
510
  }.get(extension, 'application/octet-stream')
511
-
512
- # Get appropriate image URL based on environment
513
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME:
514
- image_url = get_hf_image_url(unique_filename)
515
- full_url = image_url
 
 
 
 
 
 
516
  else:
517
- image_url = f"/static/uploads/{unique_filename}"
518
  full_url = f"{request.base_url}static/uploads/{unique_filename}"
519
-
520
  results.append({
521
  "success": True,
522
  "file_name": unique_filename,
@@ -528,7 +548,7 @@ async def upload_image(
528
  "base64_embed": f'<img src="data:{mime_type};base64,{base64_encoded}" alt="{original_filename}" />',
529
  "hashtags": hashtag_list
530
  })
531
-
532
  if len(results) == 1:
533
  return results[0]
534
  else:
@@ -548,13 +568,13 @@ async def upload_with_replace(
548
  status_code=status.HTTP_401_UNAUTHORIZED,
549
  content={"detail": "Not authenticated"}
550
  )
551
-
552
  # Process hashtags into a list
553
  hashtag_list = []
554
  if hashtags:
555
  # Split by spaces or commas and remove empty strings/whitespace
556
  hashtag_list = [tag.strip() for tag in hashtags.replace(',', ' ').split() if tag.strip()]
557
-
558
  # Parse the replacement files JSON
559
  files_to_replace = []
560
  if replace_files:
@@ -562,72 +582,53 @@ async def upload_with_replace(
562
  files_to_replace = json.loads(replace_files)
563
  except json.JSONDecodeError:
564
  files_to_replace = []
565
-
566
  # Create a map of original names to replacement decisions
567
  replace_map = {item["original_name"].lower(): item["existing_file"] for item in files_to_replace}
568
-
569
  results = []
570
-
571
  for file in files:
572
  # Check if the file is an image
573
  extension = get_file_extension(file.filename)
574
  if not is_valid_image(extension):
575
  continue # Skip non-image files
576
-
577
  # Preserve original filename in metadata
578
  original_filename = file.filename
579
  file_lower = original_filename.lower()
580
-
581
  # Read file content
582
  file.file.seek(0)
583
  file_content = await file.read()
584
-
585
  # Check if this file should replace an existing one
586
  if file_lower in replace_map:
587
- # Delete the old file
588
  old_filename = replace_map[file_lower]
589
-
590
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
591
- # Delete from Hugging Face
592
- delete_success = delete_from_hf(old_filename)
593
- if not delete_success:
594
- raise HTTPException(status_code=404, detail="Image not found or could not be deleted")
595
- else:
596
- # Delete from local storage
597
- old_file = UPLOAD_DIR / old_filename
598
- if old_file.exists():
599
- os.remove(old_file)
600
-
601
  # Remove from metadata
602
  metadata = get_image_metadata()
603
  if old_filename in metadata:
604
  del metadata[old_filename]
605
  save_image_metadata(metadata)
606
-
607
  # Generate a unique filename to prevent overwrites
608
  unique_filename = f"{uuid.uuid4()}{extension}"
609
-
610
- # Save file based on environment
611
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
612
- # Upload to Hugging Face
613
- upload_success = upload_to_hf(file_content, unique_filename)
614
- if not upload_success:
615
- # Fallback to local storage if HF upload fails
616
- file_path = UPLOAD_DIR / unique_filename
617
- with file_path.open("wb") as buffer:
618
- buffer.write(file_content)
619
- else:
620
- # Local development storage
621
- file_path = UPLOAD_DIR / unique_filename
622
- with file_path.open("wb") as buffer:
623
- buffer.write(file_content)
624
-
625
  # Save hashtags and original filename
626
  add_hashtags_to_image(unique_filename, hashtag_list, original_filename)
627
-
628
  # For base64 encoding
629
  base64_encoded = base64.b64encode(file_content).decode("utf-8")
630
-
631
  # Determine MIME type
632
  mime_type = {
633
  '.jpg': 'image/jpeg',
@@ -637,15 +638,20 @@ async def upload_with_replace(
637
  '.bmp': 'image/bmp',
638
  '.webp': 'image/webp'
639
  }.get(extension, 'application/octet-stream')
640
-
641
- # Get appropriate image URL based on environment
642
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME:
643
- image_url = get_hf_image_url(unique_filename)
644
- full_url = image_url
 
 
 
 
 
 
645
  else:
646
- image_url = f"/static/uploads/{unique_filename}"
647
  full_url = f"{request.base_url}static/uploads/{unique_filename}"
648
-
649
  results.append({
650
  "success": True,
651
  "file_name": unique_filename,
@@ -657,7 +663,7 @@ async def upload_with_replace(
657
  "base64_embed": f'<img src="data:{mime_type};base64,{base64_encoded}" alt="{original_filename}" />',
658
  "hashtags": hashtag_list
659
  })
660
-
661
  if len(results) == 1:
662
  return results[0]
663
  else:
@@ -669,36 +675,39 @@ async def view_image(request: Request, file_name: str):
669
  # Check if user is authenticated
670
  if not authenticate(request):
671
  return RedirectResponse(url="/login", status_code=status.HTTP_302_FOUND)
672
-
673
  # Mark image as viewed (not new)
674
  mark_image_as_viewed(file_name)
675
-
676
- # Determine image URL based on environment
677
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME:
678
- image_url = get_hf_image_url(file_name)
679
- embed_url = image_url
680
-
681
- # Check if file exists in Hugging Face
682
- if not image_url or file_name not in list_hf_images():
683
- raise HTTPException(status_code=404, detail="Image not found")
 
 
 
 
 
 
 
 
 
684
  else:
685
- # Check local file
686
- file_path = UPLOAD_DIR / file_name
687
- if not file_path.exists():
688
- raise HTTPException(status_code=404, detail="Image not found")
689
-
690
- image_url = f"/static/uploads/{file_name}"
691
  embed_url = f"{request.base_url}static/uploads/{file_name}"
692
-
693
  # Get metadata
694
  metadata = get_image_metadata()
695
  hashtags = []
696
  original_filename = file_name
697
-
698
  if file_name in metadata:
699
  hashtags = metadata[file_name].get("hashtags", [])
700
  original_filename = metadata[file_name].get("original_filename", file_name)
701
-
702
  return templates.TemplateResponse(
703
  "view.html",
704
  {
@@ -750,26 +759,21 @@ async def delete_image(request: Request, file_name: str):
750
  status_code=status.HTTP_401_UNAUTHORIZED,
751
  content={"detail": "Not authenticated"}
752
  )
753
-
754
- # Delete file based on environment
755
- if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
756
- # Delete from Hugging Face
757
- delete_success = delete_from_hf(file_name)
758
- if not delete_success:
759
- raise HTTPException(status_code=404, detail="Image not found or could not be deleted")
760
- else:
761
- # Delete from local storage
762
- file_path = UPLOAD_DIR / file_name
763
- if not file_path.exists():
764
- raise HTTPException(status_code=404, detail="Image not found")
765
- os.remove(file_path)
766
-
767
  # Remove from metadata
768
  metadata = get_image_metadata()
769
  if file_name in metadata:
770
  del metadata[file_name]
771
  save_image_metadata(metadata)
772
-
773
  return {"success": True, "message": f"Image {file_name} has been deleted"}
774
 
775
  # Health check endpoint for Hugging Face Spaces
@@ -777,9 +781,42 @@ async def delete_image(request: Request, file_name: str):
777
  async def health_check():
778
  return {"status": "ok"}
779
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  if __name__ == "__main__":
781
  # For local development
782
  uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True)
783
 
784
  # For production/Hugging Face (uncomment when deploying)
785
- # uvicorn.run("app:app", host="0.0.0.0", port=7860)
 
63
  DATASET_REPO = os.environ.get("HF_DATASET_REPO", "image-uploader-data")
64
  IMAGES_PATH = "images"
65
  METADATA_PATH = "metadata"
66
+ SPACE_NAME = os.environ.get(
67
+ "HF_SPACE_NAME", ""
68
+ ) # Add this environment variable for the Space name
69
 
70
  # Set HF cache directory to a writable location
71
  # This is necessary for Hugging Face Spaces which has permission issues with the default cache location
 
139
  return True
140
 
141
  def get_image_metadata():
142
+ """Get all image metadata including hashtags from local storage and sync with HF if needed."""
143
+ if METADATA_FILE.exists():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  with open(METADATA_FILE, "r") as f:
145
+ metadata = json.load(f)
146
+
147
+ # In production, sync metadata to Hugging Face if it exists locally but not on HF
148
+ if (
149
+ os.environ.get("ENV", "development") == "production"
150
+ and HF_USERNAME
151
+ and HF_TOKEN
152
+ ):
153
+ try:
154
+ # Only upload if there are changes (we'd need to implement a proper change tracking mechanism)
155
+ # For now, we'll upload every time to ensure consistency
156
+ metadata_str = json.dumps(metadata)
157
+ hf_api.upload_file(
158
+ path_or_fileobj=io.BytesIO(metadata_str.encode()),
159
+ path_in_repo=f"{METADATA_PATH}/image_metadata.json",
160
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
161
+ repo_type="dataset",
162
+ token=HF_TOKEN,
163
+ )
164
+ except Exception as e:
165
+ print(f"Error syncing metadata to Hugging Face: {e}")
166
+
167
+ return metadata
168
+
169
+ # If metadata file doesn't exist locally, create it
170
+ with open(METADATA_FILE, "w") as f:
171
+ json.dump({}, f)
172
  return {}
173
 
174
  def save_image_metadata(metadata):
175
+ """Save image metadata to the local JSON file and sync with HF."""
176
+ # Always save locally first
177
+ with open(METADATA_FILE, "w") as f:
178
+ json.dump(metadata, f)
179
+
180
+ # In production, also save to Hugging Face
181
  if os.environ.get("ENV", "development") == "production" and HF_USERNAME and HF_TOKEN:
182
  try:
183
  print(f"Saving metadata to Hugging Face repository {HF_USERNAME}/{DATASET_REPO}")
 
189
  repo_type="dataset",
190
  token=HF_TOKEN
191
  )
192
+ print(f"Metadata saved successfully to Hugging Face")
193
  except Exception as e:
194
  print(f"Error saving metadata to Hugging Face: {e}")
 
 
 
 
 
 
 
195
 
196
  def add_hashtags_to_image(filename, hashtags, original_filename=None):
197
  """Add hashtags to an image."""
 
217
  metadata[filename]["is_new"] = False
218
  save_image_metadata(metadata)
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  def get_hf_image_url(filename):
221
  """Get the URL for an image in the Hugging Face repo."""
222
+ # Return the direct Space URL if available, otherwise use dataset URL
223
+ if SPACE_NAME and HF_USERNAME:
224
+ return f"https://{HF_USERNAME}-{SPACE_NAME}.hf.space/static/uploads/{filename}"
225
+ elif HF_USERNAME:
226
  return f"https://huggingface.co/datasets/{HF_USERNAME}/{DATASET_REPO}/resolve/main/{IMAGES_PATH}/{filename}"
227
  return None
228
 
 
282
  # Check if user is authenticated
283
  if not authenticate(request):
284
  return RedirectResponse(url="/login", status_code=status.HTTP_302_FOUND)
285
+
286
  # Get all uploaded images and their metadata
287
  uploaded_images = []
288
  metadata = get_image_metadata()
289
+
290
+ if UPLOAD_DIR.exists():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  for file in UPLOAD_DIR.iterdir():
292
  if is_valid_image(get_file_extension(file.name)):
 
 
293
  # Get hashtags from metadata if available
294
  hashtags = []
295
  is_new = False
296
  original_filename = file.name
297
+
298
  if file.name in metadata:
299
  hashtags = metadata[file.name].get("hashtags", [])
300
  is_new = metadata[file.name].get("is_new", False)
301
  original_filename = metadata[file.name].get("original_filename", file.name)
302
+
303
  # If searching/filtering, check if this image should be included
304
  if search and search.lower() not in original_filename.lower() and not any(search.lower() in tag.lower() for tag in hashtags):
305
  continue
306
+
307
  if tag and tag not in hashtags:
308
  continue
309
+
310
+ # Local URL for display
311
+ image_url = f"/static/uploads/{file.name}"
312
+
313
+ # Full URL for embedding (use Space URL in production if available)
314
+ if (
315
+ SPACE_NAME
316
+ and HF_USERNAME
317
+ and os.environ.get("ENV", "development") == "production"
318
+ ):
319
+ embed_url = f"https://{HF_USERNAME}-{SPACE_NAME}.hf.space/static/uploads/{file.name}"
320
+ else:
321
+ embed_url = f"{request.base_url}static/uploads/{file.name}"
322
+
323
+ uploaded_images.append(
324
+ {
325
+ "name": file.name,
326
+ "url": image_url,
327
+ "embed_url": embed_url,
328
+ "hashtags": hashtags,
329
+ "is_new": is_new,
330
+ "original_filename": original_filename,
331
+ }
332
+ )
333
+
334
  # Get all unique hashtags for the filter dropdown
335
  all_hashtags = set()
336
  for img_data in metadata.values():
337
  if "hashtags" in img_data:
338
  all_hashtags.update(img_data["hashtags"])
339
+
340
  return templates.TemplateResponse(
341
  "index.html",
342
  {
 
348
  }
349
  )
350
 
351
+
352
+ def upload_to_hf_and_local(file_content, filename):
353
+ """Upload a file to both Hugging Face Dataset Repository and local storage."""
354
+ local_success = False
355
+ hf_success = False
356
+
357
+ # Save locally first
358
+ try:
359
+ file_path = UPLOAD_DIR / filename
360
+ with file_path.open("wb") as buffer:
361
+ buffer.write(file_content)
362
+ local_success = True
363
+ print(f"File {filename} saved locally")
364
+ except Exception as e:
365
+ print(f"Error saving file locally: {e}")
366
+
367
+ # Then upload to Hugging Face if in production
368
+ if (
369
+ os.environ.get("ENV", "development") == "production"
370
+ and HF_USERNAME
371
+ and HF_TOKEN
372
+ ):
373
+ try:
374
+ print(
375
+ f"Uploading file {filename} to Hugging Face repository {HF_USERNAME}/{DATASET_REPO}"
376
+ )
377
+ hf_api.upload_file(
378
+ path_or_fileobj=io.BytesIO(file_content),
379
+ path_in_repo=f"{IMAGES_PATH}/{filename}",
380
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
381
+ repo_type="dataset",
382
+ token=HF_TOKEN,
383
+ )
384
+ print(f"File {filename} uploaded successfully to Hugging Face")
385
+ hf_success = True
386
+ except Exception as e:
387
+ print(f"Error uploading to Hugging Face: {e}")
388
+
389
+ return local_success or hf_success
390
+
391
+
392
+ def delete_from_hf_and_local(filename):
393
+ """Delete a file from both Hugging Face Dataset Repository and local storage."""
394
+ local_success = False
395
+ hf_success = False
396
+
397
+ # Delete locally first
398
+ file_path = UPLOAD_DIR / filename
399
+ if file_path.exists():
400
+ try:
401
+ os.remove(file_path)
402
+ local_success = True
403
+ print(f"File {filename} deleted locally")
404
+ except Exception as e:
405
+ print(f"Error deleting file locally: {e}")
406
+
407
+ # Then delete from Hugging Face if in production
408
+ if (
409
+ os.environ.get("ENV", "development") == "production"
410
+ and HF_USERNAME
411
+ and HF_TOKEN
412
+ ):
413
+ try:
414
+ print(
415
+ f"Deleting file {filename} from Hugging Face repository {HF_USERNAME}/{DATASET_REPO}"
416
+ )
417
+ hf_api.delete_file(
418
+ path_in_repo=f"{IMAGES_PATH}/{filename}",
419
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
420
+ repo_type="dataset",
421
+ token=HF_TOKEN,
422
+ )
423
+ print(f"File {filename} deleted successfully from Hugging Face")
424
+ hf_success = True
425
+ except Exception as e:
426
+ print(f"Error deleting from Hugging Face: {e}")
427
+
428
+ return local_success or hf_success
429
+
430
+
431
  @app.post("/upload/")
432
  async def upload_image(
433
  request: Request,
 
441
  status_code=status.HTTP_401_UNAUTHORIZED,
442
  content={"detail": "Not authenticated"}
443
  )
444
+
445
  # Process hashtags into a list
446
  hashtag_list = []
447
  if hashtags:
448
  # Split by spaces or commas and remove empty strings/whitespace
449
  hashtag_list = [tag.strip() for tag in hashtags.replace(',', ' ').split() if tag.strip()]
450
+
451
  results = []
452
  duplicates = []
453
+
454
  # First, check for duplicate filenames
455
  metadata = get_image_metadata()
456
  all_files = {}
457
+
458
+ # Check for duplicates in local storage first (should include HF images that were downloaded during build)
459
+ if UPLOAD_DIR.exists():
 
 
 
 
 
 
 
 
 
460
  for file in UPLOAD_DIR.iterdir():
461
  if is_valid_image(get_file_extension(file.name)):
462
  # Get original filename from metadata if available
 
464
  if file.name in metadata and "original_filename" in metadata[file.name]:
465
  original_name = metadata[file.name]["original_filename"]
466
  all_files[original_name.lower()] = file.name
467
+
468
  # Check for duplicates in current upload batch
469
  for file in files:
470
  file_lower = file.filename.lower()
 
475
  "existing_file": all_files[file_lower],
476
  "original_name": file.filename
477
  })
478
+
479
  # If we found duplicates, return them to the frontend for confirmation
480
  if duplicates:
481
  return {
 
484
  "message": "Duplicate filenames detected",
485
  "action_required": "confirm_replace"
486
  }
487
+
488
  # No duplicates, proceed with upload
489
  for file in files:
490
  # Check if the file is an image
491
  extension = get_file_extension(file.filename)
492
  if not is_valid_image(extension):
493
  continue # Skip non-image files
494
+
495
  # Preserve original filename in metadata but make it safe for filesystem
496
  original_filename = file.filename
497
+
498
  # Generate a unique filename to prevent overwrites
499
  unique_filename = f"{uuid.uuid4()}{extension}"
500
+
501
  # Read file content for upload
502
  file.file.seek(0)
503
  file_content = await file.read()
504
+
505
+ # Save file to both local storage and Hugging Face
506
+ upload_success = upload_to_hf_and_local(file_content, unique_filename)
507
+
508
+ if not upload_success:
509
+ continue # Skip to next file if upload failed
510
+
 
 
 
 
 
 
 
 
 
511
  # Save hashtags and original filename
512
  add_hashtags_to_image(unique_filename, hashtag_list, original_filename)
513
+
514
  # For base64 encoding
515
  base64_encoded = base64.b64encode(file_content).decode("utf-8")
516
+
517
  # Determine MIME type
518
  mime_type = {
519
  '.jpg': 'image/jpeg',
 
523
  '.bmp': 'image/bmp',
524
  '.webp': 'image/webp'
525
  }.get(extension, 'application/octet-stream')
526
+
527
+ # Get direct image URL using Space URL if available
528
+ image_url = f"/static/uploads/{unique_filename}" # Local URL
529
+
530
+ # Full URL for embedding
531
+ if (
532
+ SPACE_NAME
533
+ and HF_USERNAME
534
+ and os.environ.get("ENV", "development") == "production"
535
+ ):
536
+ full_url = f"https://{HF_USERNAME}-{SPACE_NAME}.hf.space/static/uploads/{unique_filename}"
537
  else:
 
538
  full_url = f"{request.base_url}static/uploads/{unique_filename}"
539
+
540
  results.append({
541
  "success": True,
542
  "file_name": unique_filename,
 
548
  "base64_embed": f'<img src="data:{mime_type};base64,{base64_encoded}" alt="{original_filename}" />',
549
  "hashtags": hashtag_list
550
  })
551
+
552
  if len(results) == 1:
553
  return results[0]
554
  else:
 
568
  status_code=status.HTTP_401_UNAUTHORIZED,
569
  content={"detail": "Not authenticated"}
570
  )
571
+
572
  # Process hashtags into a list
573
  hashtag_list = []
574
  if hashtags:
575
  # Split by spaces or commas and remove empty strings/whitespace
576
  hashtag_list = [tag.strip() for tag in hashtags.replace(',', ' ').split() if tag.strip()]
577
+
578
  # Parse the replacement files JSON
579
  files_to_replace = []
580
  if replace_files:
 
582
  files_to_replace = json.loads(replace_files)
583
  except json.JSONDecodeError:
584
  files_to_replace = []
585
+
586
  # Create a map of original names to replacement decisions
587
  replace_map = {item["original_name"].lower(): item["existing_file"] for item in files_to_replace}
588
+
589
  results = []
590
+
591
  for file in files:
592
  # Check if the file is an image
593
  extension = get_file_extension(file.filename)
594
  if not is_valid_image(extension):
595
  continue # Skip non-image files
596
+
597
  # Preserve original filename in metadata
598
  original_filename = file.filename
599
  file_lower = original_filename.lower()
600
+
601
  # Read file content
602
  file.file.seek(0)
603
  file_content = await file.read()
604
+
605
  # Check if this file should replace an existing one
606
  if file_lower in replace_map:
607
+ # Delete the old file from both local storage and Hugging Face
608
  old_filename = replace_map[file_lower]
609
+ delete_from_hf_and_local(old_filename)
610
+
 
 
 
 
 
 
 
 
 
 
611
  # Remove from metadata
612
  metadata = get_image_metadata()
613
  if old_filename in metadata:
614
  del metadata[old_filename]
615
  save_image_metadata(metadata)
616
+
617
  # Generate a unique filename to prevent overwrites
618
  unique_filename = f"{uuid.uuid4()}{extension}"
619
+
620
+ # Upload to both local storage and Hugging Face
621
+ upload_success = upload_to_hf_and_local(file_content, unique_filename)
622
+
623
+ if not upload_success:
624
+ continue # Skip to next file if upload failed
625
+
 
 
 
 
 
 
 
 
 
626
  # Save hashtags and original filename
627
  add_hashtags_to_image(unique_filename, hashtag_list, original_filename)
628
+
629
  # For base64 encoding
630
  base64_encoded = base64.b64encode(file_content).decode("utf-8")
631
+
632
  # Determine MIME type
633
  mime_type = {
634
  '.jpg': 'image/jpeg',
 
638
  '.bmp': 'image/bmp',
639
  '.webp': 'image/webp'
640
  }.get(extension, 'application/octet-stream')
641
+
642
+ # Get direct image URL using Space URL if available
643
+ image_url = f"/static/uploads/{unique_filename}" # Local URL
644
+
645
+ # Full URL for embedding
646
+ if (
647
+ SPACE_NAME
648
+ and HF_USERNAME
649
+ and os.environ.get("ENV", "development") == "production"
650
+ ):
651
+ full_url = f"https://{HF_USERNAME}-{SPACE_NAME}.hf.space/static/uploads/{unique_filename}"
652
  else:
 
653
  full_url = f"{request.base_url}static/uploads/{unique_filename}"
654
+
655
  results.append({
656
  "success": True,
657
  "file_name": unique_filename,
 
663
  "base64_embed": f'<img src="data:{mime_type};base64,{base64_encoded}" alt="{original_filename}" />',
664
  "hashtags": hashtag_list
665
  })
666
+
667
  if len(results) == 1:
668
  return results[0]
669
  else:
 
675
  # Check if user is authenticated
676
  if not authenticate(request):
677
  return RedirectResponse(url="/login", status_code=status.HTTP_302_FOUND)
678
+
679
  # Mark image as viewed (not new)
680
  mark_image_as_viewed(file_name)
681
+
682
+ # Check if file exists locally (should always be the case now)
683
+ file_path = UPLOAD_DIR / file_name
684
+ if not file_path.exists():
685
+ raise HTTPException(status_code=404, detail="Image not found")
686
+
687
+ # Local URL for rendering in template
688
+ image_url = f"/static/uploads/{file_name}"
689
+
690
+ # Full URL for embedding
691
+ if (
692
+ SPACE_NAME
693
+ and HF_USERNAME
694
+ and os.environ.get("ENV", "development") == "production"
695
+ ):
696
+ embed_url = (
697
+ f"https://{HF_USERNAME}-{SPACE_NAME}.hf.space/static/uploads/{file_name}"
698
+ )
699
  else:
 
 
 
 
 
 
700
  embed_url = f"{request.base_url}static/uploads/{file_name}"
701
+
702
  # Get metadata
703
  metadata = get_image_metadata()
704
  hashtags = []
705
  original_filename = file_name
706
+
707
  if file_name in metadata:
708
  hashtags = metadata[file_name].get("hashtags", [])
709
  original_filename = metadata[file_name].get("original_filename", file_name)
710
+
711
  return templates.TemplateResponse(
712
  "view.html",
713
  {
 
759
  status_code=status.HTTP_401_UNAUTHORIZED,
760
  content={"detail": "Not authenticated"}
761
  )
762
+
763
+ # Delete from both local storage and Hugging Face
764
+ delete_success = delete_from_hf_and_local(file_name)
765
+
766
+ if not delete_success:
767
+ raise HTTPException(
768
+ status_code=404, detail="Image not found or could not be deleted"
769
+ )
770
+
 
 
 
 
 
771
  # Remove from metadata
772
  metadata = get_image_metadata()
773
  if file_name in metadata:
774
  del metadata[file_name]
775
  save_image_metadata(metadata)
776
+
777
  return {"success": True, "message": f"Image {file_name} has been deleted"}
778
 
779
  # Health check endpoint for Hugging Face Spaces
 
781
  async def health_check():
782
  return {"status": "ok"}
783
 
784
+
785
+ @app.on_event("startup")
786
+ async def startup_event():
787
+ """Run on application startup to ensure configuration is correct."""
788
+ global SPACE_NAME
789
+
790
+ # Try to extract Space name from HF_SPACE_ID if not explicitly set
791
+ if not SPACE_NAME and "HF_SPACE_ID" in os.environ:
792
+ space_id = os.environ.get("HF_SPACE_ID", "")
793
+ if space_id and "/" in space_id:
794
+ # HF_SPACE_ID is in format username/space-name
795
+ SPACE_NAME = space_id.split("/")[1]
796
+ print(f"Extracted Space name from HF_SPACE_ID: {SPACE_NAME}")
797
+
798
+ # Log configuration
799
+ if os.environ.get("ENV", "development") == "production":
800
+ if HF_USERNAME and HF_TOKEN and DATASET_REPO:
801
+ print(f"Running in production mode with Hugging Face integration:")
802
+ print(f" - Username: {HF_USERNAME}")
803
+ print(f" - Dataset: {DATASET_REPO}")
804
+ if SPACE_NAME:
805
+ print(f" - Space: {SPACE_NAME}")
806
+ print(f" - Space URL: https://{HF_USERNAME}-{SPACE_NAME}.hf.space")
807
+ else:
808
+ print(" - Space name not set. Direct URLs will use dataset links.")
809
+ else:
810
+ print(
811
+ "Warning: Running in production mode but Hugging Face credentials not fully configured."
812
+ )
813
+ else:
814
+ print("Running in development mode. Local storage will be used.")
815
+
816
+
817
  if __name__ == "__main__":
818
  # For local development
819
  uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True)
820
 
821
  # For production/Hugging Face (uncomment when deploying)
822
+ # uvicorn.run("app:app", host="0.0.0.0", port=7860)
download_images.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import json
4
+ import io
5
+ import tempfile
6
+ from pathlib import Path
7
+ from huggingface_hub import HfApi, HfFolder, hf_hub_download
8
+
9
+ print("Starting image download from Hugging Face dataset")
10
+
11
+ # Get environment variables
12
+ HF_USERNAME = os.environ.get("HF_USERNAME", "")
13
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
14
+ DATASET_REPO = os.environ.get("HF_DATASET_REPO", "image-uploader-data")
15
+
16
+ # Set HF cache directory to a writable location
17
+ os.environ["HF_HOME"] = os.path.join(tempfile.gettempdir(), "huggingface")
18
+ os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(
19
+ tempfile.gettempdir(), "huggingface", "hub"
20
+ )
21
+ os.makedirs(os.environ["HF_HOME"], exist_ok=True)
22
+ os.makedirs(os.environ["HUGGINGFACE_HUB_CACHE"], exist_ok=True)
23
+
24
+ # Constants
25
+ IMAGES_PATH = "images"
26
+ METADATA_PATH = "metadata"
27
+ UPLOAD_DIR = Path("static/uploads")
28
+ METADATA_DIR = Path("static/metadata")
29
+ METADATA_FILE = METADATA_DIR / "image_metadata.json"
30
+
31
+ # Create directories if they don't exist
32
+ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
33
+ METADATA_DIR.mkdir(parents=True, exist_ok=True)
34
+
35
+ # Initialize HfApi
36
+ hf_api = HfApi(token=HF_TOKEN)
37
+
38
+ try:
39
+ # Check if repo exists
40
+ print(f"Checking if repository {HF_USERNAME}/{DATASET_REPO} exists")
41
+ hf_api.repo_info(repo_id=f"{HF_USERNAME}/{DATASET_REPO}", repo_type="dataset")
42
+
43
+ # Download metadata first
44
+ print(f"Downloading metadata from {HF_USERNAME}/{DATASET_REPO}")
45
+ try:
46
+ metadata_file = hf_api.hf_hub_download(
47
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
48
+ filename=f"{METADATA_PATH}/image_metadata.json",
49
+ repo_type="dataset",
50
+ token=HF_TOKEN,
51
+ local_dir=os.path.join(tempfile.gettempdir(), "hf_downloads"),
52
+ )
53
+
54
+ print(f"Metadata downloaded to {metadata_file}")
55
+ with open(metadata_file, "r") as f:
56
+ metadata = json.load(f)
57
+
58
+ # Save metadata locally
59
+ with open(METADATA_FILE, "w") as f:
60
+ json.dump(metadata, f)
61
+ except Exception as e:
62
+ print(f"Error downloading metadata: {e}")
63
+ metadata = {}
64
+ # Initialize metadata file if it doesn't exist
65
+ with open(METADATA_FILE, "w") as f:
66
+ json.dump({}, f)
67
+
68
+ # List all files in the dataset
69
+ print("Listing files in the dataset")
70
+ files = hf_api.list_repo_files(
71
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}", repo_type="dataset", token=HF_TOKEN
72
+ )
73
+
74
+ # Filter only image files
75
+ image_files = [f for f in files if f.startswith(f"{IMAGES_PATH}/")]
76
+ print(f"Found {len(image_files)} images")
77
+
78
+ # Download each image
79
+ for image_file in image_files:
80
+ try:
81
+ filename = os.path.basename(image_file)
82
+ print(f"Downloading {filename}")
83
+
84
+ # Download file
85
+ download_path = hf_api.hf_hub_download(
86
+ repo_id=f"{HF_USERNAME}/{DATASET_REPO}",
87
+ filename=image_file,
88
+ repo_type="dataset",
89
+ token=HF_TOKEN,
90
+ local_dir=os.path.join(tempfile.gettempdir(), "hf_downloads"),
91
+ )
92
+
93
+ # Copy to uploads directory
94
+ destination = UPLOAD_DIR / filename
95
+ with open(download_path, "rb") as src, open(destination, "wb") as dst:
96
+ dst.write(src.read())
97
+
98
+ print(f"Saved {filename} to {destination}")
99
+ except Exception as e:
100
+ print(f"Error downloading {image_file}: {e}")
101
+
102
+ print("Image download completed")
103
+
104
+ except Exception as e:
105
+ print(f"Error: {e}")
106
+ print("Creating empty metadata file")
107
+ with open(METADATA_FILE, "w") as f:
108
+ json.dump({}, f)
static/metadata/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+
static/uploads/.gitkeep ADDED
@@ -0,0 +1 @@
 
 
1
+