Bonnie422 commited on
Commit
7bf655d
·
verified ·
1 Parent(s): 4dc4df2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import fitz # PyMuPDF for PDF handling
4
+
5
+ # Function to extract text from PDF
6
+ def extract_text_from_pdf(pdf_path):
7
+ doc = fitz.open(pdf_path)
8
+ text = ""
9
+ for page in doc:
10
+ text += page.get_text()
11
+ return text
12
+
13
+ # Function to handle file upload and text input
14
+ def analyze_document(file, prompt):
15
+ # Check file type and extract text accordingly
16
+ if file.name.endswith(".pdf"):
17
+ text = extract_text_from_pdf(file.name)
18
+ elif file.name.endswith(".txt"):
19
+ text = file.read().decode("utf-8")
20
+ else:
21
+ return "Unsupported file format. Please upload a PDF or TXT file."
22
+
23
+ # Load model and tokenizer
24
+ model_name = "Alibaba-NLP/gte-Qwen1.5-7B-instruct"
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
26
+ model = AutoModelForCausalLM.from_pretrained(model_name)
27
+
28
+ # Generate input for the model
29
+ input_text = f"Document content:\n{text}\n\nPrompt:\n{prompt}"
30
+ inputs = tokenizer(input_text, return_tensors="pt")
31
+ outputs = model.generate(**inputs)
32
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
33
+
34
+ return response
35
+
36
+ # Define Gradio interface
37
+ iface = gr.Interface(
38
+ fn=analyze_document,
39
+ inputs=[
40
+ gr.inputs.File(label="Upload TXT or PDF Document"),
41
+ gr.inputs.Textbox(label="Prompt", placeholder="Enter your structured prompt here")
42
+ ],
43
+ outputs="text",
44
+ title="Document Analysis with GPT Model",
45
+ description="Upload a TXT or PDF document and enter a prompt to get an analysis."
46
+ )
47
+
48
+ # Launch the interface
49
+ iface.launch()