Gangadhar123 commited on
Commit
f701cf1
Β·
verified Β·
1 Parent(s): d781f94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -7,15 +7,6 @@ import gradio as gr
7
  from transformers import pipeline
8
  from extraction_service import ExtractionService
9
 
10
- # Initialize the HF pipeline with smaller faster model
11
- model_name = "declare-lab/flan-alpaca-small"
12
- pipe = pipeline(
13
- "text2text-generation",
14
- model=model_name,
15
- tokenizer=model_name,
16
- device=-1 # CPU
17
- )
18
-
19
  # Load field extraction config
20
  extractor = ExtractionService("fields_config.json")
21
 
@@ -36,6 +27,15 @@ def extract_text_from_pdf(pdf_stream: io.BytesIO) -> str:
36
  except Exception as e:
37
  return f"Error processing PDF: {str(e)}"
38
 
 
 
 
 
 
 
 
 
 
39
  # Store extracted content
40
  extracted_data_store = {"raw_text": "", "fields": {}}
41
 
@@ -69,18 +69,17 @@ def ask_question(question):
69
  return "Upload and extract a PDF first."
70
 
71
  context = json.dumps(extracted_data_store["fields"], indent=2)
72
- prompt = f"Extracted data:\n{context}\n\nQuestion: {question}\nAnswer:"
73
-
74
  try:
75
- # Using HF pipeline generate method for text generation
76
  result = pipe(prompt, max_length=256, do_sample=False)
77
- answer = result[0]['generated_text']
78
  return answer.strip()
79
  except Exception as e:
80
- return f"Error generating answer: {str(e)}"
81
 
82
  with gr.Blocks() as demo:
83
- gr.Markdown("## πŸ›‘οΈ Insurance PDF Extractor & Q&A (using declare-lab/flan-alpaca-small)")
84
  with gr.Row():
85
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
86
  extract_btn = gr.Button("Extract")
 
7
  from transformers import pipeline
8
  from extraction_service import ExtractionService
9
 
 
 
 
 
 
 
 
 
 
10
  # Load field extraction config
11
  extractor = ExtractionService("fields_config.json")
12
 
 
27
  except Exception as e:
28
  return f"Error processing PDF: {str(e)}"
29
 
30
+ # Initialize Hugging Face pipeline with a small public model
31
+ model_name = "google/flan-t5-small"
32
+ pipe = pipeline(
33
+ "text2text-generation",
34
+ model=model_name,
35
+ tokenizer=model_name,
36
+ device=-1 # Use CPU; change to 0 if you want GPU
37
+ )
38
+
39
  # Store extracted content
40
  extracted_data_store = {"raw_text": "", "fields": {}}
41
 
 
69
  return "Upload and extract a PDF first."
70
 
71
  context = json.dumps(extracted_data_store["fields"], indent=2)
72
+ prompt = f"Context: {context}\nQuestion: {question}"
73
+
74
  try:
 
75
  result = pipe(prompt, max_length=256, do_sample=False)
76
+ answer = result[0]["generated_text"]
77
  return answer.strip()
78
  except Exception as e:
79
+ return f"Model inference error: {str(e)}"
80
 
81
  with gr.Blocks() as demo:
82
+ gr.Markdown("## πŸ›‘οΈ Insurance PDF Extractor & Q&A (Using google/flan-t5-small)")
83
  with gr.Row():
84
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
85
  extract_btn = gr.Button("Extract")