Arafath10 commited on
Commit
3b59cf8
·
verified ·
1 Parent(s): 9875546

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -59
main.py CHANGED
@@ -15,16 +15,26 @@ from io import BytesIO
15
  from PIL import Image
16
  import io
17
  import requests
 
 
 
18
 
19
  from dotenv import load_dotenv
20
  # Load the environment variables from the .env file
21
  load_dotenv()
22
 
23
- secret = os.environ["GEMINI"]
24
- genai.configure(api_key=secret)
25
- model_vision = genai.GenerativeModel('gemini-pro-vision')
 
 
26
  model_text = genai.GenerativeModel('gemini-pro')
27
 
 
 
 
 
 
28
  app = FastAPI()
29
 
30
  app.add_middleware(
@@ -37,56 +47,35 @@ app.add_middleware(
37
 
38
 
39
 
40
- def encode_image(image):
41
- # Convert image to BytesIO object (in memory)
42
- buffered = BytesIO()
43
- image.save(buffered, format=image.format) # Use the original image format (e.g., PNG, JPEG)
44
- img_bytes = buffered.getvalue()
45
-
46
- # Encode image to base64
47
- base64_image = base64.b64encode(img_bytes).decode('utf-8')
48
- return base64_image
49
 
50
 
51
-
52
- def vision(image):
53
- # OpenAI API Key
54
- api_key = os.environ["OPEN_AI"]
55
-
56
-
57
- # Getting the base64 string
58
- base64_image = encode_image(image)
59
-
60
- headers = {
61
- "Content-Type": "application/json",
62
- "Authorization": f"Bearer {api_key}"
63
- }
64
-
65
- payload = {
66
- "model": "gpt-4o-mini",
67
- "messages": [
68
- {
69
- "role": "user",
70
- "content": [
71
- {
72
- "type": "text",
73
- "text": "extract all data from this image"
74
- },
75
- {
76
- "type": "image_url",
77
- "image_url": {
78
- "url": f"data:image/jpeg;base64,{base64_image}"
79
- }
80
- }
81
- ]
82
- }
83
- ],
84
- "max_tokens": 300
85
- }
86
 
87
- response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
88
 
89
- return response.json()['choices'][0]['message']['content']
 
 
90
 
91
 
92
  @app.post("/get_ocr_data/")
@@ -99,16 +88,13 @@ async def get_data(input_file: UploadFile = File(...)):
99
  text = ""
100
 
101
  if file_type == "application/pdf":
102
- # Read PDF file using PyPDF2
103
- pdf_reader = PdfReader(io.BytesIO(file_content))
104
- for page in pdf_reader.pages:
105
- text += page.extract_text()
106
-
107
- elif file_type in ["image/jpeg", "image/png", "image/jpg"]:
108
- # Read Image file using PIL and pytesseract
109
- image = Image.open(io.BytesIO(file_content))
110
- text = vision(image)
111
-
112
  else:
113
  raise HTTPException(status_code=400, detail="Unsupported file type")
114
 
 
15
  from PIL import Image
16
  import io
17
  import requests
18
+ import fitz # PyMuPDF
19
+ import os
20
+
21
 
22
  from dotenv import load_dotenv
23
  # Load the environment variables from the .env file
24
  load_dotenv()
25
 
26
+ # Configure Gemini API
27
+ genai.configure(api_key="AIzaSyBsutShR1tNNdomkaL3DYHjMrM_59Y1mg8")
28
+ #secret = os.environ["GEMINI"]
29
+ #genai.configure(api_key=secret)
30
+ model_vision = genai.GenerativeModel('gemini-1.5-flash')
31
  model_text = genai.GenerativeModel('gemini-pro')
32
 
33
+
34
+
35
+
36
+
37
+
38
  app = FastAPI()
39
 
40
  app.add_middleware(
 
47
 
48
 
49
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
+ def vision(file_content):
53
+ # Open the PDF
54
+ pdf_document = fitz.open("pdf",pdf)
55
+ gemini_input = ["extract the whole text"]
56
+ # Iterate through the pages
57
+ for page_num in range(len(pdf_document)):
58
+ # Select the page
59
+ page = pdf_document.load_page(page_num)
60
+
61
+ # Render the page to a pixmap (image)
62
+ pix = page.get_pixmap()
63
+ print(type(pix))
64
+
65
+ # Convert the pixmap to bytes
66
+ img_bytes = pix.tobytes("png")
67
+
68
+ # Convert bytes to a PIL Image
69
+ img = Image.open(io.BytesIO(img_bytes))
70
+ gemini_input.append(img)
71
+ # # Save the image if needed
72
+ # img.save(f'page_{page_num + 1}.png')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ print("PDF pages converted to images successfully!")
75
 
76
+ # Now you can pass the PIL image to the model_vision
77
+ response = model_vision.generate_content(gemini_input).text
78
+ return response
79
 
80
 
81
  @app.post("/get_ocr_data/")
 
88
  text = ""
89
 
90
  if file_type == "application/pdf":
91
+ if text=="":
92
+ text = vision(file_content)
93
+ # else:
94
+ # # Read PDF file using PyPDF2
95
+ # pdf_reader = PdfReader(io.BytesIO(file_content))
96
+ # for page in pdf_reader.pages:
97
+ # text += page.extract_text()
 
 
 
98
  else:
99
  raise HTTPException(status_code=400, detail="Unsupported file type")
100