Gangadhar123 commited on
Commit
fa12524
Β·
verified Β·
1 Parent(s): 88ace73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -4,17 +4,18 @@ import fitz # PyMuPDF
4
  import pytesseract
5
  from PIL import Image
6
  import gradio as gr
7
- from transformers import pipeline
8
  from extraction_service import ExtractionService
 
9
 
10
  # Load field extraction config
11
  extractor = ExtractionService("fields_config.json")
12
 
13
- # Load the TinyLlama chat pipeline (text-generation with instruction-tuning)
14
  chat_pipeline = pipeline(
15
  "text-generation",
16
- model="TinyLlama/TinyLlama-1.1B-Chat",
17
- device=-1, # CPU; change to 0 if you have a GPU
18
  max_length=512,
19
  do_sample=True,
20
  temperature=0.7,
@@ -71,17 +72,21 @@ def ask_question(question):
71
  return "Upload and extract a PDF first."
72
 
73
  context = json.dumps(extracted_data_store["fields"], indent=2)
74
- prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
75
-
76
  try:
77
- results = chat_pipeline(prompt)
78
- answer = results[0]["generated_text"].split("Answer:")[-1].strip()
 
 
 
 
79
  return answer
80
  except Exception as e:
81
  return f"Model inference error: {str(e)}"
82
 
83
  with gr.Blocks() as demo:
84
- gr.Markdown("## πŸ›‘οΈ Insurance PDF Extractor & Q&A (TinyLlama 1.1B Chat)")
85
  with gr.Row():
86
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
87
  extract_btn = gr.Button("Extract")
 
4
  import pytesseract
5
  from PIL import Image
6
  import gradio as gr
7
+ import os
8
  from extraction_service import ExtractionService
9
+ from transformers import pipeline
10
 
11
  # Load field extraction config
12
  extractor = ExtractionService("fields_config.json")
13
 
14
+ # Initialize Hugging Face text-generation pipeline with Dolly v2 3B
15
  chat_pipeline = pipeline(
16
  "text-generation",
17
+ model="databricks/dolly-v2-3b",
18
+ device=-1, # CPU; set device=0 for GPU if available
19
  max_length=512,
20
  do_sample=True,
21
  temperature=0.7,
 
72
  return "Upload and extract a PDF first."
73
 
74
  context = json.dumps(extracted_data_store["fields"], indent=2)
75
+ prompt = f"You are an assistant for insurance data.\nExtracted data:\n{context}\n\nQuestion: {question}\nAnswer:"
76
+
77
  try:
78
+ responses = chat_pipeline(prompt, max_length=512, do_sample=True, temperature=0.7, top_p=0.9)
79
+ answer = responses[0]["generated_text"]
80
+
81
+ # Extract answer text after the question (remove prompt)
82
+ if prompt in answer:
83
+ answer = answer.split(prompt,1)[1].strip()
84
  return answer
85
  except Exception as e:
86
  return f"Model inference error: {str(e)}"
87
 
88
  with gr.Blocks() as demo:
89
+ gr.Markdown("## πŸ›‘οΈ Insurance PDF Extractor & Q&A using Dolly-v2-3B")
90
  with gr.Row():
91
  pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
92
  extract_btn = gr.Button("Extract")