3emibrahim commited on
Commit
fa8cfe4
·
verified ·
1 Parent(s): a1345ee

Update backend/main.py

Browse files
Files changed (1) hide show
  1. backend/main.py +188 -59
backend/main.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, UploadFile, File, Form
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse, JSONResponse
4
  import cloudinary
@@ -6,12 +6,18 @@ import cloudinary.uploader
6
  import requests
7
  import io
8
  import os
 
 
 
 
 
 
 
 
9
  import pandas as pd
10
  from PyPDF2 import PdfReader
11
- from pptx import Presentation
12
  import textract
13
- from google import genai
14
- from google.genai import types
15
 
16
  # Cloudinary Config
17
  cloudinary.config(
@@ -23,79 +29,202 @@ cloudinary.config(
23
  # Google Gemini Client
24
  genai_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
25
 
26
- def upload_file(file_bytes: bytes) -> str:
27
  try:
28
- result = cloudinary.uploader.upload(io.BytesIO(file_bytes))
 
 
 
29
  return result.get("secure_url")
30
  except Exception as e:
31
  print(f"Cloudinary upload error: {e}")
32
  return None
33
 
34
- def convert_to_text(file: UploadFile) -> str:
35
- # Convert PDF to text
36
- if file.content_type == "application/pdf":
37
- pdf_reader = PdfReader(io.BytesIO(file.file.read()))
38
- text = ""
39
- for page in pdf_reader.pages:
40
- text += page.extract_text()
41
- return text
42
-
43
- # Convert Excel to text
44
- elif file.content_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" or file.content_type == "application/vnd.ms-excel":
45
- df = pd.read_excel(io.BytesIO(file.file.read()))
46
- return df.to_string()
47
-
48
- # Convert PowerPoint to text
49
- elif file.content_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation":
50
- prs = Presentation(io.BytesIO(file.file.read()))
51
- text = ""
52
- for slide in prs.slides:
53
- for shape in slide.shapes:
54
- if hasattr(shape, "text"):
55
- text += shape.text
56
- return text
57
-
58
- # Handle TXT files
59
- elif file.content_type == "text/plain":
60
- return file.file.read().decode("utf-8")
61
-
62
- # Unsupported file type
63
- else:
64
  return None
65
 
66
- @app.post("/process/")
67
- async def process_file(file: UploadFile = File(...), user_prompt: str = Form(...)):
68
- # Convert file to text
69
- file_text = convert_to_text(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- if not file_text:
72
- return JSONResponse(
73
- status_code=400,
74
- content={"error": "Unsupported file type or conversion failed."}
75
- )
76
 
77
- # Convert the text to a temporary file for uploading
78
- file_bytes = file_text.encode("utf-8")
79
- file_url = upload_file(file_bytes)
 
80
 
81
- if not file_url:
82
- return JSONResponse(
83
- status_code=500,
84
- content={"error": "File upload failed."}
85
  )
86
 
87
- # Generate a response using the uploaded file (if needed)
88
- result = generate_response(user_prompt, file_url)
89
- return {"response": result}
 
 
 
 
 
 
90
 
91
- def generate_response(user_prompt: str, file_url: str) -> str:
92
  try:
93
- # Send the URL to Gemini for processing (or other relevant actions)
 
 
 
94
  response = genai_client.models.generate_content(
95
- model="gemini-2.0-flash-exp",
96
- contents=[user_prompt, types.Part.from_text(file_url)],
 
 
 
 
 
 
97
  )
98
 
99
  return response.text
100
  except Exception as e:
101
  return f"Google Gemini API error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
2
  from fastapi.staticfiles import StaticFiles
3
  from fastapi.responses import FileResponse, JSONResponse
4
  import cloudinary
 
6
  import requests
7
  import io
8
  import os
9
+ import mimetypes
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ from google import genai
14
+ from google.genai import types
15
+
16
+ # Import required libraries for file conversion
17
  import pandas as pd
18
  from PyPDF2 import PdfReader
19
+ import pptx
20
  import textract
 
 
21
 
22
  # Cloudinary Config
23
  cloudinary.config(
 
29
  # Google Gemini Client
30
  genai_client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
31
 
32
+ def upload_file_to_cloudinary(file_bytes: bytes, file_type: str) -> str:
33
  try:
34
+ result = cloudinary.uploader.upload(
35
+ io.BytesIO(file_bytes),
36
+ resource_type="raw" if file_type == "text" else "auto"
37
+ )
38
  return result.get("secure_url")
39
  except Exception as e:
40
  print(f"Cloudinary upload error: {e}")
41
  return None
42
 
43
+ def upload_image(file_bytes: bytes) -> str:
44
+ try:
45
+ result = cloudinary.uploader.upload(io.BytesIO(file_bytes))
46
+ return result.get("secure_url")
47
+ except Exception as e:
48
+ print(f"Cloudinary upload error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return None
50
 
51
+ def convert_to_text(file_bytes: bytes, file_type: str, filename: str) -> str:
52
+ """Convert various file types to plain text"""
53
+ try:
54
+ # Create a temporary file to process
55
+ with tempfile.NamedTemporaryFile(delete=False, suffix=Path(filename).suffix) as temp_file:
56
+ temp_file.write(file_bytes)
57
+ temp_path = temp_file.name
58
+
59
+ text_content = ""
60
+
61
+ if file_type == "application/pdf":
62
+ # Convert PDF to text
63
+ pdf = PdfReader(temp_path)
64
+ for page in pdf.pages:
65
+ text_content += page.extract_text() + "\n"
66
+
67
+ elif file_type in ["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]:
68
+ # Convert Excel to text
69
+ df = pd.read_excel(temp_path)
70
+ text_content = df.to_string()
71
+
72
+ elif file_type in ["application/vnd.ms-powerpoint", "application/vnd.openxmlformats-officedocument.presentationml.presentation"]:
73
+ # Convert PowerPoint to text
74
+ ppt = pptx.Presentation(temp_path)
75
+ for slide in ppt.slides:
76
+ for shape in slide.shapes:
77
+ if hasattr(shape, "text"):
78
+ text_content += shape.text + "\n"
79
+ text_content += "\n---\n"
80
+
81
+ elif file_type == "text/plain":
82
+ # Already text, just read it
83
+ with open(temp_path, 'r', encoding='utf-8', errors='ignore') as f:
84
+ text_content = f.read()
85
+
86
+ else:
87
+ # Try using textract for other types
88
+ text_content = textract.process(temp_path).decode('utf-8', errors='ignore')
89
+
90
+ # Clean up temporary file
91
+ os.unlink(temp_path)
92
+
93
+ return text_content
94
 
95
+ except Exception as e:
96
+ print(f"Conversion error: {e}")
97
+ raise HTTPException(status_code=500, detail=f"File conversion failed: {str(e)}")
 
 
98
 
99
+ def generate_response(user_prompt: str, image_url: str) -> str:
100
+ try:
101
+ # Download image from URL
102
+ image_bytes = requests.get(image_url).content
103
 
104
+ # Prepare image part
105
+ image_part = types.Part.from_bytes(
106
+ data=image_bytes, mime_type="image/jpeg"
 
107
  )
108
 
109
+ # Send to Gemini
110
+ response = genai_client.models.generate_content(
111
+ model="gemini-2.0-flash-exp",
112
+ contents=[user_prompt, image_part],
113
+ )
114
+
115
+ return response.text
116
+ except Exception as e:
117
+ return f"Google Gemini API error: {e}"
118
 
119
+ def generate_response_for_document(user_prompt: str, doc_url: str, mime_type: str) -> str:
120
  try:
121
+ # Download document from URL
122
+ doc_data = requests.get(doc_url).content
123
+
124
+ # Send to Gemini
125
  response = genai_client.models.generate_content(
126
+ model="gemini-2.0-flash",
127
+ contents=[
128
+ types.Part.from_bytes(
129
+ data=doc_data,
130
+ mime_type=mime_type,
131
+ ),
132
+ user_prompt
133
+ ]
134
  )
135
 
136
  return response.text
137
  except Exception as e:
138
  return f"Google Gemini API error: {e}"
139
+
140
+ app = FastAPI()
141
+
142
+ # Serve static files
143
+ app.mount("/static", StaticFiles(directory="frontend"), name="static")
144
+
145
+ @app.get("/")
146
+ async def home():
147
+ return FileResponse("frontend/index.html")
148
+
149
+ @app.get("/text-generator")
150
+ async def text_generator():
151
+ return FileResponse("frontend/text-generator.html")
152
+
153
+ @app.get("/about")
154
+ async def about():
155
+ return FileResponse("frontend/about.html")
156
+
157
+ @app.get("/features")
158
+ async def features():
159
+ return FileResponse("frontend/features.html")
160
+
161
+ @app.post("/process/")
162
+ async def process_file(file: UploadFile = File(...), user_prompt: str = Form(...)):
163
+ file_bytes = await file.read()
164
+ content_type = file.content_type or mimetypes.guess_type(file.filename)[0] or "application/octet-stream"
165
+
166
+ # Handle image files
167
+ if content_type.startswith("image/"):
168
+ image_url = upload_image(file_bytes)
169
+
170
+ if not image_url:
171
+ return JSONResponse(
172
+ status_code=500,
173
+ content={"error": "Image upload failed."}
174
+ )
175
+
176
+ result = generate_response(user_prompt, image_url)
177
+ return {"response": result}
178
+
179
+ # Handle document files (Excel, PDF, TXT, PowerPoint)
180
+ elif content_type in [
181
+ "application/pdf",
182
+ "application/vnd.ms-excel",
183
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
184
+ "application/vnd.ms-powerpoint",
185
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
186
+ "text/plain"
187
+ ]:
188
+ try:
189
+ # Convert the file to text
190
+ text_content = convert_to_text(file_bytes, content_type, file.filename)
191
+
192
+ # Save text content to a file
193
+ text_filename = f"{Path(file.filename).stem}.txt"
194
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode='w', encoding='utf-8') as text_file:
195
+ text_file.write(text_content)
196
+ text_path = text_file.name
197
+
198
+ # Upload text file to Cloudinary
199
+ with open(text_path, 'rb') as f:
200
+ text_url = upload_file_to_cloudinary(f.read(), "text")
201
+
202
+ # Clean up temporary text file
203
+ os.unlink(text_path)
204
+
205
+ if not text_url:
206
+ return JSONResponse(
207
+ status_code=500,
208
+ content={"error": "Text file upload failed."}
209
+ )
210
+
211
+ # Process with Gemini
212
+ result = generate_response_for_document(user_prompt, text_url, "text/plain")
213
+
214
+ return {
215
+ "response": result,
216
+ "text_content": text_content[:500] + "..." if len(text_content) > 500 else text_content,
217
+ "text_url": text_url
218
+ }
219
+
220
+ except Exception as e:
221
+ return JSONResponse(
222
+ status_code=500,
223
+ content={"error": f"Processing failed: {str(e)}"}
224
+ )
225
+
226
+ else:
227
+ return JSONResponse(
228
+ status_code=400,
229
+ content={"error": "Unsupported file type. Please upload an image, PDF, Excel, PowerPoint, or text file."}
230
+ )