Spaces:

jerecom
/

Still_frame

Paused

App Files Files Community

Badal commited on 11 days ago

Commit

fce7147

0 Parent(s):

Upload code

Browse files

Files changed (4) hide show

Dockerfile +20 -0
README.md +86 -0
app.py +167 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Tesseract और लैंग्वेज पैक्स इनस्टॉल करना
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    tesseract-ocr-hin \
+    tesseract-ocr-tel \
+    libgl1 \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,86 @@

+---
+license: mit
+title: 'Still frame '
+sdk: docker
+emoji: 🚀
+colorFrom: green
+colorTo: red
+pinned: false
+thumbnail: >-
+  https://cdn-uploads.huggingface.co/production/uploads/683d3312c1707119d087fc4d/DDo9ikZZQM1L9k5UC1ePR.jpeg
+short_description: 'Screenshot for picture '
+---
+title: TMDB OCR Pro API emoji: 🎬 colorFrom: blue colorTo: indigo sdk: docker app_file: app.py pinned: false
+🎬 TMDB + OCR Pro API (OptiPix Engine)
+Created by: Badal 🚀
+This is a high-performance, parallel-processing API designed to fetch 100% clean, text-free movie screenshots and posters. It bridges the gap between IMDb, TMDb, and the OptiPix Image Compression Engine.
+✨ Key Features
+ * Smart Text-Filter: Uses TMDb's language tag hack (iso_639_1 is null) to instantly filter out fan-made posters and title cards.
+ * Hardcore OCR Scanner: Integrates Tesseract OCR (English, Hindi, Telugu) to aggressively scan and reject any remaining images containing text.
+ * Parallel Optimization: Uses asyncio.gather to send multiple images to the OptiPix compression server simultaneously, resulting in blazing-fast response times.
+ * ISP Bypass (India Ready): Generates optimized URLs via a custom CDN, bypassing Indian ISP blocks on TMDb image servers.
+ * Dual URL Output: Returns both the original TMDb HD URL and the Secured OptiPix Compressed URL.
+📡 API Reference
+Endpoint
+POST /get-media
+Request Format
+Content-Type: multipart/form-data
+Parameters
+| Parameter | Type | Required | Default | Description |
+|---|---|---|---|---|
+| title_id | string | Yes | - | The IMDb Title ID of the movie (e.g., tt3801314). |
+| top_shots | integer | No | 3 | Maximum number of clean screenshots you want to fetch. |
+| level | string | No | extreme | Compression level for OptiPix (none, medium, extreme). |
+💻 How to Make a Request
+Example 1: cURL (Terminal)
+curl -X POST "https://YOUR_SPACE_NAME.hf.space/get-media" \
+     -H "accept: application/json" \
+     -H "Content-Type: application/x-www-form-urlencoded" \
+     -d "title_id=tt3801314&top_shots=3&level=extreme"
+Example 2: JavaScript (Frontend)
+const formData = new FormData();
+formData.append("title_id", "tt3801314");
+formData.append("top_shots", 3);
+formData.append("level", "extreme");
+fetch("https://YOUR_SPACE_NAME.hf.space/get-media", {
+  method: "POST",
+  body: formData
+})
+.then(response => response.json())
+.then(data => console.log(data));
+📦 Expected JSON Response
+The API returns a clean JSON object containing both the Poster and an array of Screenshots.
+{
+  "title_id": "tt3801314",
+  "tmdb_id": 293313,
+  "requested_shots": 3,
+  "total_screenshots_scanned": 15,
+  "poster": {
+    "original_url": "[https://image.tmdb.org/t/p/original/mxyz123.jpg](https://image.tmdb.org/t/p/original/mxyz123.jpg)",
+    "processed_url": "[https://bk939448-image-optimizer-api.hf.space/optimized_poster.jpg](https://bk939448-image-optimizer-api.hf.space/optimized_poster.jpg)"
+  },
+  "screenshots": [
+    {
+      "original_url": "[https://image.tmdb.org/t/p/original/abc1.jpg](https://image.tmdb.org/t/p/original/abc1.jpg)",
+      "processed_url": "[https://bk939448-image-optimizer-api.hf.space/shot1.jpg](https://bk939448-image-optimizer-api.hf.space/shot1.jpg)"
+    },
+    {
+      "original_url": "[https://image.tmdb.org/t/p/original/abc2.jpg](https://image.tmdb.org/t/p/original/abc2.jpg)",
+      "processed_url": "[https://bk939448-image-optimizer-api.hf.space/shot2.jpg](https://bk939448-image-optimizer-api.hf.space/shot2.jpg)"
+    },
+    {
+      "original_url": "[https://image.tmdb.org/t/p/original/abc3.jpg](https://image.tmdb.org/t/p/original/abc3.jpg)",
+      "processed_url": "[https://bk939448-image-optimizer-api.hf.space/shot3.jpg](https://bk939448-image-optimizer-api.hf.space/shot3.jpg)"
+    }
+  ]
+}
+Note: If OptiPix fails to compress an image, the processed_url will return as null. You can always fallback to the original_url.
+⚙️ Deployment Requirements
+If you are hosting this yourself, ensure the following setup:
+ * Dockerfile: Must have tesseract-ocr, tesseract-ocr-hin, and tesseract-ocr-tel installed via apt-get.
+ * Environment Variables: You MUST set your TMDb API Key in the server secrets.
+   * TMDB_API_KEY = your_tmdb_api_key_here

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+import httpx
+from fastapi import FastAPI, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List
+import asyncio
+import uvicorn
+import pytesseract
+from PIL import Image
+import io
+import re
+# Tesseract का Linux पाथ
+pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
+app = FastAPI(title="TMDB + OCR Pro API | Badal Special")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+OPTIPIX_API = "https://jerecom-image-optimizer-api-2.hf.space/upload-poster"
+TMDB_API_KEY = os.getenv("TMDB_API_KEY")
+class ImageMedia(BaseModel):
+    original_url: str
+    processed_url: Optional[str]
+class ProcessResponse(BaseModel):
+    title_id: str
+    tmdb_id: int
+    requested_shots: int
+    total_screenshots_scanned: int
+    poster: Optional[ImageMedia]
+    screenshots: List[ImageMedia]
+# --- 1. OCR Scanner Function ---
+def check_text_in_image(image_bytes: bytes) -> bool:
+    try:
+        img = Image.open(io.BytesIO(image_bytes))
+        img.thumbnail((500, 500)) # फ़ास्ट स्कैनिंग के लिए छोटा करना
+        img = img.convert('L') # ब्लैक एंड वाइट
+        # इंग्लिश, हिंदी और तेलुगु स्कैन
+        text = pytesseract.image_to_string(img, lang='eng+hin+tel')
+        # सिर्फ़ शब्द और नंबर रखना
+        clean_text = re.sub(r'[^a-zA-Z0-9\u0900-\u097F\u0C00-\u0C7F]', '', text)
+        # अगर 4 कैरेक्टर से ज़्यादा टेक्स्ट है, तो यह स्क्रीनशॉट नहीं, पोस्टर है (True)
+        return len(clean_text) > 4
+    except Exception as e:
+        print(f"OCR Parsing Error: {e}")
+        return True # रिस्क नहीं लेने का, रिजेक्ट कर दो!
+# --- 2. Parallel OptiPix Function ---
+async def optimize_image(client: httpx.AsyncClient, raw_url: str, level: str):
+    form_data = {"level": level, "url": raw_url}
+    result = {"original_url": raw_url, "processed_url": None}
+    try:
+        res = await client.post(OPTIPIX_API, data=form_data, timeout=30.0)
+        data = res.json()
+        if data.get("success"):
+            result["processed_url"] = data.get("url")
+    except Exception as e:
+        print(f"OptiPix failed for {raw_url} - Error: {e}")
+    return result
+@app.post("/get-media", response_model=ProcessResponse)
+async def get_media(
+    title_id: str = Form(..., description="IMDb Title ID (e.g., tt3801314)"),
+    top_shots: int = Form(3, description="Number of screenshots required"),
+    level: str = Form("extreme", description="Compression level")
+):
+    if not TMDB_API_KEY:
+        raise HTTPException(status_code=500, detail="TMDB_API_KEY is missing!")
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        # --- STEP 1: TMDb ID ढूँढना ---
+        find_url = f"https://api.themoviedb.org/3/find/{title_id}?external_source=imdb_id&api_key={TMDB_API_KEY}"
+        find_res = await client.get(find_url)
+        find_data = find_res.json()
+        movie_results = find_data.get("movie_results", [])
+        if not movie_results:
+            return {"error": "TMDb पर इस IMDb ID की कोई मूवी नहीं मिली!"}
+        tmdb_id = movie_results[0]["id"]
+        # --- STEP 2: TMDb से इमेजेज लाना ---
+        images_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/images?api_key={TMDB_API_KEY}"
+        img_res = await client.get(images_url)
+        img_data = img_res.json()
+        raw_backdrops = img_data.get("backdrops", [])
+        raw_posters = img_data.get("posters", [])
+        # 🔥 SMART HACK: सिर्फ़ वो बैकड्रॉप्स लो जिनमें लैंग्वेज 'null' हो
+        clean_backdrops = [shot for shot in raw_backdrops if shot.get("iso_639_1") is None]
+        clean_backdrops.sort(key=lambda x: x["width"], reverse=True)
+        # --- STEP 3: Poster निकालना ---
+        best_poster_url = None
+        if raw_posters:
+            raw_posters.sort(key=lambda x: x["width"], reverse=True)
+            best_poster_url = f"https://image.tmdb.org/t/p/original{raw_posters[0]['file_path']}"
+        # --- STEP 4: HARDCORE OCR SCANNING ---
+        verified_screenshots_urls = []
+        for shot in clean_backdrops:
+            if len(verified_screenshots_urls) >= top_shots:
+                break # ज़रूरत पूरी हो गई, रुक जाओ
+            shot_url = f"https://image.tmdb.org/t/p/original{shot['file_path']}"
+            try:
+                # इमेज डाउनलोड करके OCR को दो
+                img_res_dl = await client.get(shot_url, timeout=10.0)
+                if img_res_dl.status_code == 200:
+                    # Async में OCR चलाओ ताकि सर्वर हैंग न हो
+                    has_text = await asyncio.to_thread(check_text_in_image, img_res_dl.content)
+                    if not has_text: # अगर टेक्स्ट नहीं है, तो पास!
+                        verified_screenshots_urls.append(shot_url)
+                        print(f"Clean Screenshot Passed OCR: {shot_url}")
+                    else:
+                        print(f"Rejected by OCR (Text Found): {shot_url}")
+            except Exception as e:
+                print(f"Image download error for OCR: {e}")
+                continue
+        # --- STEP 5: पैरेलल ऑप्टिमाइज़ेशन (OptiPix) ---
+        tasks = []
+        if best_poster_url:
+            tasks.append(optimize_image(client, best_poster_url, level))
+        for url in verified_screenshots_urls:
+            tasks.append(optimize_image(client, url, level))
+        results = await asyncio.gather(*tasks)
+        final_poster = None
+        final_screenshots = []
+        if best_poster_url and results:
+            final_poster = results[0]
+            final_screenshots = results[1:]
+        else:
+            final_screenshots = results
+    return ProcessResponse(
+        title_id=title_id,
+        tmdb_id=tmdb_id,
+        requested_shots=top_shots,
+        total_screenshots_scanned=len(clean_backdrops),
+        poster=final_poster,
+        screenshots=final_screenshots
+    )
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+httpx
+pydantic
+python-multipart
+Pillow
+pytesseract