Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
-
from utility.utils import extract_text_from_images,
|
| 6 |
from backup.backup import NER_Model
|
| 7 |
from paddleocr import PaddleOCR
|
| 8 |
|
|
@@ -116,7 +116,36 @@ def reset_upload():
|
|
| 116 |
logging.warning("File not found for removal")
|
| 117 |
return redirect(url_for('index'))
|
| 118 |
|
| 119 |
-
@app.route('/process', methods=['GET','POST'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
def process_file():
|
| 121 |
uploaded_files = session.get('uploaded_files', [])
|
| 122 |
if not uploaded_files:
|
|
@@ -127,35 +156,47 @@ def process_file():
|
|
| 127 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 128 |
logging.info(f"Processing files: {file_paths}")
|
| 129 |
|
| 130 |
-
extracted_text = {}
|
| 131 |
-
processed_Img = {}
|
| 132 |
-
|
| 133 |
try:
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
| 136 |
logging.info(f"Processed images: {processed_Img}")
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
logging.info(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
except Exception as e:
|
| 146 |
-
logging.
|
| 147 |
-
|
| 148 |
|
| 149 |
LLMdata = {}
|
| 150 |
extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 151 |
logging.info(f"Extracted text(Backup): {extracted_text}")
|
| 152 |
logging.info(f"Processed images(Backup): {processed_Img}")
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
cont_data = process_extracted_text(extracted_text)
|
| 161 |
logging.info(f"Contextual data: {cont_data}")
|
|
|
|
| 2 |
from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
|
| 3 |
import os
|
| 4 |
import logging
|
| 5 |
+
from utility.utils import extract_text_from_images, process_extracted_text, process_resume_data
|
| 6 |
from backup.backup import NER_Model
|
| 7 |
from paddleocr import PaddleOCR
|
| 8 |
|
|
|
|
| 116 |
logging.warning("File not found for removal")
|
| 117 |
return redirect(url_for('index'))
|
| 118 |
|
| 119 |
+
# @app.route('/process', methods=['GET','POST'])
|
| 120 |
+
# def process_file():
|
| 121 |
+
# uploaded_files = session.get('uploaded_files', [])
|
| 122 |
+
# if not uploaded_files:
|
| 123 |
+
# flash('No files selected for processing')
|
| 124 |
+
# logging.warning("No files selected for processing")
|
| 125 |
+
# return redirect(url_for('index'))
|
| 126 |
+
|
| 127 |
+
# file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 128 |
+
# logging.info(f"Processing files: {file_paths}")
|
| 129 |
+
|
| 130 |
+
# extracted_text = {}
|
| 131 |
+
# processed_Img = {}
|
| 132 |
+
|
| 133 |
+
# try:
|
| 134 |
+
# extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 135 |
+
# logging.info(f"Extracted text: {extracted_text}")
|
| 136 |
+
# logging.info(f"Processed images: {processed_Img}")
|
| 137 |
+
|
| 138 |
+
# llmText = json_to_llm_str(extracted_text)
|
| 139 |
+
# logging.info(f"LLM text: {llmText}")
|
| 140 |
+
|
| 141 |
+
# LLMdata = Data_Extractor(llmText)
|
| 142 |
+
# print("llm data--------->",llmText)
|
| 143 |
+
# logging.info(f"LLM data: {LLMdata}")
|
| 144 |
+
|
| 145 |
+
# except Exception as e:
|
| 146 |
+
# logging.error(f"Error during LLM processing: {e}")
|
| 147 |
+
# logging.info("Running backup model...")
|
| 148 |
+
@app.route('/process', methods=['GET', 'POST'])
|
| 149 |
def process_file():
|
| 150 |
uploaded_files = session.get('uploaded_files', [])
|
| 151 |
if not uploaded_files:
|
|
|
|
| 156 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
| 157 |
logging.info(f"Processing files: {file_paths}")
|
| 158 |
|
|
|
|
|
|
|
|
|
|
| 159 |
try:
|
| 160 |
+
# Single Groq VLM pass on each image
|
| 161 |
+
LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 162 |
+
|
| 163 |
+
logging.info(f"Groq VLM structured data: {LLMdata}")
|
| 164 |
+
logging.info(f"Extracted text blobs: {extracted_text}")
|
| 165 |
logging.info(f"Processed images: {processed_Img}")
|
| 166 |
|
| 167 |
+
# Regex fallback / augmentation from model text
|
| 168 |
+
cont_data = process_extracted_text(extracted_text)
|
| 169 |
+
logging.info(f"Contextual data: {cont_data}")
|
| 170 |
+
|
| 171 |
+
processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
|
| 172 |
+
logging.info(f"Processed data: {processed_data}")
|
| 173 |
+
|
| 174 |
+
session['processed_data'] = processed_data
|
| 175 |
+
session['processed_Img'] = processed_Img
|
| 176 |
+
|
| 177 |
+
flash('Data processed and analyzed successfully')
|
| 178 |
+
return redirect(url_for('result'))
|
| 179 |
|
| 180 |
except Exception as e:
|
| 181 |
+
logging.exception(f"Error during processing: {e}")
|
| 182 |
+
flash('Processing failed')
|
| 183 |
|
| 184 |
LLMdata = {}
|
| 185 |
extracted_text, processed_Img = extract_text_from_images(file_paths)
|
| 186 |
logging.info(f"Extracted text(Backup): {extracted_text}")
|
| 187 |
logging.info(f"Processed images(Backup): {processed_Img}")
|
| 188 |
+
try:
|
| 189 |
+
if extracted_text:
|
| 190 |
+
|
| 191 |
+
text = json_to_llm_str(extracted_text)
|
| 192 |
+
LLMdata = NER_Model(text)
|
| 193 |
+
logging.info(f"NER model data: {LLMdata}")
|
| 194 |
+
else:
|
| 195 |
+
logging.warning("No extracted text available for backup model")
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logging.exception(f"Error during processing: {e}")
|
| 198 |
+
flash('Processing failed')
|
| 199 |
+
return redirect(url_for('index'))
|
| 200 |
|
| 201 |
cont_data = process_extracted_text(extracted_text)
|
| 202 |
logging.info(f"Contextual data: {cont_data}")
|