WebashalarForML commited on
Commit
f74e1b2
·
verified ·
1 Parent(s): 00227b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -21
app.py CHANGED
@@ -2,7 +2,7 @@
2
  from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
3
  import os
4
  import logging
5
- from utility.utils import extract_text_from_images, Data_Extractor, json_to_llm_str, process_extracted_text, process_resume_data
6
  from backup.backup import NER_Model
7
  from paddleocr import PaddleOCR
8
 
@@ -116,7 +116,36 @@ def reset_upload():
116
  logging.warning("File not found for removal")
117
  return redirect(url_for('index'))
118
 
119
- @app.route('/process', methods=['GET','POST'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def process_file():
121
  uploaded_files = session.get('uploaded_files', [])
122
  if not uploaded_files:
@@ -127,35 +156,47 @@ def process_file():
127
  file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
128
  logging.info(f"Processing files: {file_paths}")
129
 
130
- extracted_text = {}
131
- processed_Img = {}
132
-
133
  try:
134
- extracted_text, processed_Img = extract_text_from_images(file_paths)
135
- logging.info(f"Extracted text: {extracted_text}")
 
 
 
136
  logging.info(f"Processed images: {processed_Img}")
137
 
138
- llmText = json_to_llm_str(extracted_text)
139
- logging.info(f"LLM text: {llmText}")
140
-
141
- LLMdata = Data_Extractor(llmText)
142
- print("llm data--------->",llmText)
143
- logging.info(f"LLM data: {LLMdata}")
 
 
 
 
 
 
144
 
145
  except Exception as e:
146
- logging.error(f"Error during LLM processing: {e}")
147
- logging.info("Running backup model...")
148
 
149
  LLMdata = {}
150
  extracted_text, processed_Img = extract_text_from_images(file_paths)
151
  logging.info(f"Extracted text(Backup): {extracted_text}")
152
  logging.info(f"Processed images(Backup): {processed_Img}")
153
- if extracted_text:
154
- text = json_to_llm_str(extracted_text)
155
- LLMdata = NER_Model(text)
156
- logging.info(f"NER model data: {LLMdata}")
157
- else:
158
- logging.warning("No extracted text available for backup model")
 
 
 
 
 
 
159
 
160
  cont_data = process_extracted_text(extracted_text)
161
  logging.info(f"Contextual data: {cont_data}")
 
2
  from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
3
  import os
4
  import logging
5
+ from utility.utils import extract_text_from_images, process_extracted_text, process_resume_data
6
  from backup.backup import NER_Model
7
  from paddleocr import PaddleOCR
8
 
 
116
  logging.warning("File not found for removal")
117
  return redirect(url_for('index'))
118
 
119
+ # @app.route('/process', methods=['GET','POST'])
120
+ # def process_file():
121
+ # uploaded_files = session.get('uploaded_files', [])
122
+ # if not uploaded_files:
123
+ # flash('No files selected for processing')
124
+ # logging.warning("No files selected for processing")
125
+ # return redirect(url_for('index'))
126
+
127
+ # file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
128
+ # logging.info(f"Processing files: {file_paths}")
129
+
130
+ # extracted_text = {}
131
+ # processed_Img = {}
132
+
133
+ # try:
134
+ # extracted_text, processed_Img = extract_text_from_images(file_paths)
135
+ # logging.info(f"Extracted text: {extracted_text}")
136
+ # logging.info(f"Processed images: {processed_Img}")
137
+
138
+ # llmText = json_to_llm_str(extracted_text)
139
+ # logging.info(f"LLM text: {llmText}")
140
+
141
+ # LLMdata = Data_Extractor(llmText)
142
+ # print("llm data--------->",llmText)
143
+ # logging.info(f"LLM data: {LLMdata}")
144
+
145
+ # except Exception as e:
146
+ # logging.error(f"Error during LLM processing: {e}")
147
+ # logging.info("Running backup model...")
148
+ @app.route('/process', methods=['GET', 'POST'])
149
  def process_file():
150
  uploaded_files = session.get('uploaded_files', [])
151
  if not uploaded_files:
 
156
  file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
157
  logging.info(f"Processing files: {file_paths}")
158
 
 
 
 
159
  try:
160
+ # Single Groq VLM pass on each image
161
+ LLMdata, extracted_text, processed_Img = extract_text_from_images(file_paths)
162
+
163
+ logging.info(f"Groq VLM structured data: {LLMdata}")
164
+ logging.info(f"Extracted text blobs: {extracted_text}")
165
  logging.info(f"Processed images: {processed_Img}")
166
 
167
+ # Regex fallback / augmentation from model text
168
+ cont_data = process_extracted_text(extracted_text)
169
+ logging.info(f"Contextual data: {cont_data}")
170
+
171
+ processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
172
+ logging.info(f"Processed data: {processed_data}")
173
+
174
+ session['processed_data'] = processed_data
175
+ session['processed_Img'] = processed_Img
176
+
177
+ flash('Data processed and analyzed successfully')
178
+ return redirect(url_for('result'))
179
 
180
  except Exception as e:
181
+ logging.exception(f"Error during processing: {e}")
182
+ flash('Processing failed')
183
 
184
  LLMdata = {}
185
  extracted_text, processed_Img = extract_text_from_images(file_paths)
186
  logging.info(f"Extracted text(Backup): {extracted_text}")
187
  logging.info(f"Processed images(Backup): {processed_Img}")
188
+ try:
189
+ if extracted_text:
190
+
191
+ text = json_to_llm_str(extracted_text)
192
+ LLMdata = NER_Model(text)
193
+ logging.info(f"NER model data: {LLMdata}")
194
+ else:
195
+ logging.warning("No extracted text available for backup model")
196
+ except Exception as e:
197
+ logging.exception(f"Error during processing: {e}")
198
+ flash('Processing failed')
199
+ return redirect(url_for('index'))
200
 
201
  cont_data = process_extracted_text(extracted_text)
202
  logging.info(f"Contextual data: {cont_data}")