Spaces:

3emibrahim
/

debugging-testing1

Sleeping

App Files Files Community

3emibrahim commited on Apr 28

Commit

fa8cfe4

verified ·

1 Parent(s): a1345ee

Update backend/main.py

Browse files

Files changed (1) hide show

backend/main.py +188 -59

backend/main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, JSONResponse
 import cloudinary
@@ -6,12 +6,18 @@ import cloudinary.uploader
 import requests
 import io
 import os
 import pandas as pd
 from PyPDF2 import PdfReader
-from pptx import Presentation
 import textract
-from google import genai
-from google.genai import types
 # Cloudinary Config
 cloudinary.config(
@@ -23,79 +29,202 @@ cloudinary.config(
 # Google Gemini Client
 genai_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
-def upload_file(file_bytes: bytes) -> str:
     try:
-        result = cloudinary.uploader.upload(io.BytesIO(file_bytes))
         return result.get("secure_url")
     except Exception as e:
         print(f"Cloudinary upload error: {e}")
         return None
-def convert_to_text(file: UploadFile) -> str:
-    # Convert PDF to text
-    if file.content_type == "application/pdf":
-        pdf_reader = PdfReader(io.BytesIO(file.file.read()))
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text()
-        return text
-    # Convert Excel to text
-    elif file.content_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" or file.content_type == "application/vnd.ms-excel":
-        df = pd.read_excel(io.BytesIO(file.file.read()))
-        return df.to_string()
-    # Convert PowerPoint to text
-    elif file.content_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
-        prs = Presentation(io.BytesIO(file.file.read()))
-        text = ""
-        for slide in prs.slides:
-            for shape in slide.shapes:
-                if hasattr(shape, "text"):
-                    text += shape.text
-        return text
-    # Handle TXT files
-    elif file.content_type == "text/plain":
-        return file.file.read().decode("utf-8")
-    # Unsupported file type
-    else:
         return None
-@app.post("/process/")
-async def process_file(file: UploadFile = File(...), user_prompt: str = Form(...)):
-    # Convert file to text
-    file_text = convert_to_text(file)
-    if not file_text:
-        return JSONResponse(
-            status_code=400,
-            content={"error": "Unsupported file type or conversion failed."}
-        )
-    # Convert the text to a temporary file for uploading
-    file_bytes = file_text.encode("utf-8")
-    file_url = upload_file(file_bytes)
-    if not file_url:
-        return JSONResponse(
-            status_code=500,
-            content={"error": "File upload failed."}
         )
-    # Generate a response using the uploaded file (if needed)
-    result = generate_response(user_prompt, file_url)
-    return {"response": result}
-def generate_response(user_prompt: str, file_url: str) -> str:
     try:
-        # Send the URL to Gemini for processing (or other relevant actions)
         response = genai_client.models.generate_content(
-            model="gemini-2.0-flash-exp",
-            contents=[user_prompt, types.Part.from_text(file_url)],
         )
         return response.text
     except Exception as e:
         return f"Google Gemini API error: {e}"

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse, JSONResponse
 import cloudinary
 import requests
 import io
 import os
+import mimetypes
+import tempfile
+from pathlib import Path
+from google import genai
+from google.genai import types
+# Import required libraries for file conversion
 import pandas as pd
 from PyPDF2 import PdfReader
+import pptx
 import textract
 # Cloudinary Config
 cloudinary.config(
 # Google Gemini Client
 genai_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
+def upload_file_to_cloudinary(file_bytes: bytes, file_type: str) -> str:
     try:
+        result = cloudinary.uploader.upload(
+            io.BytesIO(file_bytes),
+            resource_type="raw" if file_type == "text" else "auto"
+        )
         return result.get("secure_url")
     except Exception as e:
         print(f"Cloudinary upload error: {e}")
         return None
+def upload_image(file_bytes: bytes) -> str:
+    try:
+        result = cloudinary.uploader.upload(io.BytesIO(file_bytes))
+        return result.get("secure_url")
+    except Exception as e:
+        print(f"Cloudinary upload error: {e}")
         return None
+def convert_to_text(file_bytes: bytes, file_type: str, filename: str) -> str:
+    """Convert various file types to plain text"""
+    try:
+        # Create a temporary file to process
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(filename).suffix) as temp_file:
+            temp_file.write(file_bytes)
+            temp_path = temp_file.name
+        text_content = ""
+        if file_type == "application/pdf":
+            # Convert PDF to text
+            pdf = PdfReader(temp_path)
+            for page in pdf.pages:
+                text_content += page.extract_text() + "\n"
+        elif file_type in ["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
+            # Convert Excel to text
+            df = pd.read_excel(temp_path)
+            text_content = df.to_string()
+        elif file_type in ["application/vnd.ms-powerpoint", "application/vnd.openxmlformats-officedocument.presentationml.presentation"]:
+            # Convert PowerPoint to text
+            ppt = pptx.Presentation(temp_path)
+            for slide in ppt.slides:
+                for shape in slide.shapes:
+                    if hasattr(shape, "text"):
+                        text_content += shape.text + "\n"
+                text_content += "\n---\n"
+        elif file_type == "text/plain":
+            # Already text, just read it
+            with open(temp_path, 'r', encoding='utf-8', errors='ignore') as f:
+                text_content = f.read()
+        else:
+            # Try using textract for other types
+            text_content = textract.process(temp_path).decode('utf-8', errors='ignore')
+        # Clean up temporary file
+        os.unlink(temp_path)
+        return text_content
+    except Exception as e:
+        print(f"Conversion error: {e}")
+        raise HTTPException(status_code=500, detail=f"File conversion failed: {str(e)}")
+def generate_response(user_prompt: str, image_url: str) -> str:
+    try:
+        # Download image from URL
+        image_bytes = requests.get(image_url).content
+        # Prepare image part
+        image_part = types.Part.from_bytes(
+            data=image_bytes, mime_type="image/jpeg"
         )
+        # Send to Gemini
+        response = genai_client.models.generate_content(
+            model="gemini-2.0-flash-exp",
+            contents=[user_prompt, image_part],
+        )
+        return response.text
+    except Exception as e:
+        return f"Google Gemini API error: {e}"
+def generate_response_for_document(user_prompt: str, doc_url: str, mime_type: str) -> str:
     try:
+        # Download document from URL
+        doc_data = requests.get(doc_url).content
+        # Send to Gemini
         response = genai_client.models.generate_content(
+            model="gemini-2.0-flash",
+            contents=[
+                types.Part.from_bytes(
+                    data=doc_data,
+                    mime_type=mime_type,
+                ),
+                user_prompt
+            ]
         )
         return response.text
     except Exception as e:
         return f"Google Gemini API error: {e}"
+app = FastAPI()
+# Serve static files
+app.mount("/static", StaticFiles(directory="frontend"), name="static")
+@app.get("/")
+async def home():
+    return FileResponse("frontend/index.html")
+@app.get("/text-generator")
+async def text_generator():
+    return FileResponse("frontend/text-generator.html")
+@app.get("/about")
+async def about():
+    return FileResponse("frontend/about.html")
+@app.get("/features")
+async def features():
+    return FileResponse("frontend/features.html")
+@app.post("/process/")
+async def process_file(file: UploadFile = File(...), user_prompt: str = Form(...)):
+    file_bytes = await file.read()
+    content_type = file.content_type or mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
+    # Handle image files
+    if content_type.startswith("image/"):
+        image_url = upload_image(file_bytes)
+        if not image_url:
+            return JSONResponse(
+                status_code=500,
+                content={"error": "Image upload failed."}
+            )
+        result = generate_response(user_prompt, image_url)
+        return {"response": result}
+    # Handle document files (Excel, PDF, TXT, PowerPoint)
+    elif content_type in [
+        "application/pdf",
+        "application/vnd.ms-excel",
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "application/vnd.ms-powerpoint",
+        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+        "text/plain"
+    ]:
+        try:
+            # Convert the file to text
+            text_content = convert_to_text(file_bytes, content_type, file.filename)
+            # Save text content to a file
+            text_filename = f"{Path(file.filename).stem}.txt"
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') as text_file:
+                text_file.write(text_content)
+                text_path = text_file.name
+            # Upload text file to Cloudinary
+            with open(text_path, 'rb') as f:
+                text_url = upload_file_to_cloudinary(f.read(), "text")
+            # Clean up temporary text file
+            os.unlink(text_path)
+            if not text_url:
+                return JSONResponse(
+                    status_code=500,
+                    content={"error": "Text file upload failed."}
+                )
+            # Process with Gemini
+            result = generate_response_for_document(user_prompt, text_url, "text/plain")
+            return {
+                "response": result,
+                "text_content": text_content[:500] + "..." if len(text_content) > 500 else text_content,
+                "text_url": text_url
+            }
+        except Exception as e:
+            return JSONResponse(
+                status_code=500,
+                content={"error": f"Processing failed: {str(e)}"}
+            )
+    else:
+        return JSONResponse(
+            status_code=400,
+            content={"error": "Unsupported file type. Please upload an image, PDF, Excel, PowerPoint, or text file."}
+        )