Spaces:
Runtime error
Runtime error
Update services/ocr_service.py
Browse files- services/ocr_service.py +28 -1
services/ocr_service.py
CHANGED
@@ -8,7 +8,8 @@ from pdf2image import convert_from_path
|
|
8 |
|
9 |
|
10 |
class OCRService:
|
11 |
-
def __init__(self):
|
|
|
12 |
return
|
13 |
|
14 |
def extract_ocrless_pdf(self, filepath):
|
@@ -85,3 +86,29 @@ class OCRService:
|
|
85 |
del chunks[id]
|
86 |
|
87 |
return chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
class OCRService:
|
11 |
+
def __init__(self, LLAMAPARSE_API_KEY):
|
12 |
+
self.llama_parse_key = LLAMAPARSE_API_KEY
|
13 |
return
|
14 |
|
15 |
def extract_ocrless_pdf(self, filepath):
|
|
|
86 |
del chunks[id]
|
87 |
|
88 |
return chunks
|
89 |
+
|
90 |
+
def llama_parse_ocr(self, file_path):
|
91 |
+
llamaparse_url = 'https://api.cloud.llamaindex.ai/api/parsing/upload'
|
92 |
+
headers = {
|
93 |
+
'accept': 'application/json',
|
94 |
+
'Authorization': f'Bearer {self.llama_parse_key}'
|
95 |
+
}
|
96 |
+
files = {
|
97 |
+
'file': (file_path, open(file_path, 'rb'), 'application/pdf')
|
98 |
+
}
|
99 |
+
response = requests.post(llamaparse_url, headers=headers, files=files)
|
100 |
+
print(response.json()) # If you want to print the JSON response
|
101 |
+
|
102 |
+
job_id = response.json()["id"]
|
103 |
+
result_type = "markdown"
|
104 |
+
|
105 |
+
llamaparse_result_url = f"https://api.cloud.llamaindex.ai/api/parsing/job/{job_id}/result/{result_type}"
|
106 |
+
|
107 |
+
# check for the result until its ready
|
108 |
+
while True:
|
109 |
+
response = requests.get(llamaparse_result_url, headers=headers)
|
110 |
+
if response.status_code == 200:
|
111 |
+
break
|
112 |
+
|
113 |
+
|
114 |
+
return response.json()['markdown']
|