Spaces:

harshvisualz
/

wcrawler

Sleeping

App Files Files Community

harsh-dev commited on Nov 20, 2025

Commit

83fe205

1 Parent(s): fd70500

initial push

Browse files

Files changed (7) hide show

Dockerfile +17 -0
main.py +107 -0
requirements.txt +28 -0
utils/__init__.py +3 -0
utils/__pycache__/__init__.cpython-313.pyc +0 -0
utils/__pycache__/main.cpython-313.pyc +0 -0
utils/main.py +117 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use HuggingFace Playwright-enabled image
+FROM ghcr.io/huggingface/playwright-python:latest
+# Set working directory
+WORKDIR /app
+# Copy all project files
+COPY . /app
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# HuggingFace requires port 7860
+EXPOSE 7860
+# Start FastAPI
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from fastapi import FastAPI
+from fastapi.responses import FileResponse, StreamingResponse
+from ddgs import DDGS
+import json
+from utils import findImages, findVideos, findNews, findBooks, findSearchResults
+from fastapi.middleware.cors import CORSMiddleware
+import os
+from io import BytesIO
+from playwright.sync_api import sync_playwright
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all HTTP methods (GET, POST, etc.)
+    allow_headers=["*"],  # Allow all headers
+)
+@app.get("/")
+def read_root():
+    return {"message": "Hello, World!"}
+@app.get("/search")
+def search(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto"
+):
+    results = findSearchResults(query, region, safesearch, timelimit, max_results, page, backend)
+    return results
+@app.get("/images")
+def images(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+    size: str | None = None,
+    color: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+    license_image: str | None = None,
+):
+    results = findImages(query, region, safesearch, timelimit, max_results, page, backend, size, color, type_image, layout, license_image)
+    return results
+@app.get("/videos")
+def videos(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+    resolution: str | None = None,
+    duration: str | None = None,
+    license_videos: str | None = None,
+):
+    results = findVideos(query, region, safesearch, timelimit, max_results, page, backend, resolution, duration, license_videos)
+    return results
+@app.get("/news")
+def news(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+):
+    results = findNews(query, region, safesearch, timelimit, max_results, page, backend)
+    return results
+@app.get("/books")
+def books(
+    query: str,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+):
+    results = findBooks(query, max_results, page, backend)
+    return results
+@app.get("/screenshot")
+def screenshot(url: str):
+    with sync_playwright() as p:
+        browser = p.chromium.launch(channel="chromium", headless=True)  # uses system Chrome
+        page = browser.new_page()
+        page.goto(url)
+        img_bytes = page.screenshot()
+        browser.close()
+    img_data = BytesIO(img_bytes)
+    img_data.seek(0)
+    return StreamingResponse(img_data, media_type="image/png", headers={"Content-Disposition": "inline; filename=screenshot.png"})

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+annotated-types==0.7.0
+anyio==4.11.0
+Brotli==1.1.0
+certifi==2025.8.3
+click==8.3.0
+ddgs==9.6.0
+exceptiongroup==1.3.0
+fastapi==0.118.0
+greenlet==3.2.4
+h11==0.16.0
+h2==4.3.0
+hpack==4.1.0
+httpcore==1.0.9
+httpx==0.28.1
+hyperframe==6.1.0
+idna==3.10
+lxml==6.0.2
+playwright==1.55.0
+primp==0.15.0
+pydantic==2.11.9
+pydantic_core==2.33.2
+pyee==13.0.0
+sniffio==1.3.1
+socksio==1.0.0
+starlette==0.48.0
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+uvicorn==0.37.0

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .main import findBooks, findImages, findNews, findVideos, findSearchResults
2	+
3	+ __all__ = ["findBooks", "findImages", "findNews", "findVideos", "findSearchResults"]

utils/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (313 Bytes). View file

utils/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (4.2 kB). View file

utils/main.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from ddgs import DDGS
+def findSearchResults(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto"
+) -> list[dict[str, str]]:
+    # results = DDGS(query, max_results=max_results)
+    results = DDGS().text(query, max_results=max_results, region=region, safesearch=safesearch, timelimit=timelimit, page=page, backend=backend)
+    for i, result in enumerate(results):
+        print(f"{i+1}: {result['title']} - {result['href']}")
+    return results
+def findImages(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+    size: str | None = None,
+    color: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+    license_image: str | None = None,
+) -> list[dict[str, str]]:
+    results = DDGS().images(
+        query,
+        region=region,
+        safesearch=safesearch,
+        timelimit=timelimit,
+        max_results=max_results,
+        page=page,
+        backend=backend,
+        size=size,
+        color=color,
+        type_image=type_image,
+        layout=layout,
+        license_image=license_image,
+    )
+    print(safesearch)
+    for i, result in enumerate(results):
+        print(f"{i+1}: {result['title']} - {result['image']}")
+    return results
+def findVideos(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+    resolution: str | None = None,
+    duration: str | None = None,
+    license_videos: str | None = None,
+) -> list[dict[str, str]]:
+    results = DDGS().videos(
+        query,
+        region=region,
+        safesearch=safesearch,
+        timelimit=timelimit,
+        max_results=max_results,
+        page=page,
+        backend=backend,
+        resolution=resolution,
+        duration=duration,
+        license_videos=license_videos,
+    )
+    for i, result in enumerate(results):
+        print(f"{i+1}: {result['title']} - {result['embed_url']}")
+    return results
+def findNews(
+    query: str,
+    region: str = "us-en",
+    safesearch: str = "moderate",
+    timelimit: str | None = None,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+) -> list[dict[str, str]]:
+    results = DDGS().news(
+        query,
+        region=region,
+        safesearch=safesearch,
+        timelimit=timelimit,
+        max_results=max_results,
+        page=page,
+        backend=backend,
+    )
+    for i, result in enumerate(results):
+        print(f"{i+1}: {result['title']} - {result['url']}")
+    return results
+def findBooks(
+    query: str,
+    max_results: int | None = 10,
+    page: int = 1,
+    backend: str = "auto",
+) -> list[dict[str, str]]:
+    results = DDGS().books(
+        query,
+        max_results=max_results,
+        page=page,
+        backend=backend,
+    )
+    for i, result in enumerate(results):
+        print(f"{i+1}: {result['title']} - {result['url']}")
+    return results