nonacnov commited on
Commit
868fea5
1 Parent(s): 8b156bf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from groq import Groq
3
+ import gradio as gr
4
+ from PyPDF2 import PdfReader
5
+ import re
6
+ import nltk
7
+ from nltk.corpus import stopwords
8
+
9
+ os.environ['GROQ_API_KEY'] = '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'
10
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
11
+
12
+ def summarize(file, prompt):
13
+
14
+ #preprocess PDF file
15
+ #extracting text from all pages
16
+ reader = PdfReader(file)
17
+ num_pages = len(reader.pages)
18
+
19
+ all_text = []
20
+
21
+ #extract text from the first three pages
22
+ for page_number in range(min(3, num_pages)):
23
+ page = reader.pages[page_number]
24
+ text = page.extract_text()
25
+ all_text.append(text)
26
+
27
+ #extract text from the last three pages
28
+ for page_number in range(max(0, num_pages - 3), num_pages):
29
+ page = reader.pages[page_number]
30
+ text = page.extract_text()
31
+ all_text.append(text)
32
+
33
+ full_text = ' '.join(all_text)
34
+
35
+ #tokenization
36
+ tokens = full_text.split()
37
+
38
+ #removing punctuation after tokenization
39
+ cleaned_tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens]
40
+
41
+ #removing empty tokens
42
+ cleaned_tokens = [token for token in cleaned_tokens if token]
43
+
44
+ #removing stopwords
45
+ stop_words = set(stopwords.words('english'))
46
+ cleaned_tokens = [token for token in cleaned_tokens if token.lower() not in stop_words]
47
+
48
+ #concatenate prompt and text
49
+ input_text = prompt + " " + full_text
50
+
51
+ #summarize using groq model
52
+ chat_completion = client.chat.completions.create(
53
+ messages=[{
54
+ "role": "user",
55
+ "content": input_text}],
56
+ model="Mixtral-8x7b-32768")
57
+
58
+ summarize = chat_completion.choices[0].message.content
59
+ return summarize
60
+
61
+ #have to define prompt box first to have it under the examples
62
+ prompt = gr.Textbox(placeholder="Pick one of the examples or type your prompt...", label = "Prompt Input", lines=8)
63
+
64
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as iface:
65
+ #place to upload files; I have it to only show pdfs files, but someone could still upload non-PDF files it would just send back an error
66
+ file = gr.File(label="Upload PDF", file_types=["pdf"])
67
+
68
+ #box for the output
69
+ sum_box = gr.Textbox(placeholder="Your summary will appear here...", label = "Summary Output", lines=8, interactive= False)
70
+
71
+ #examples for users to use, when clicked it fills in the prompt textbox
72
+ examples = gr.Examples(examples=[
73
+ "Write a two-paragraph summary of this PDF document, emphasizing the key points and conclusions"
74
+ , "Write a one-paragraph summary of the key findings or arguments presented in this PDF"
75
+ , "Provide a bullet-point outline of the key insights from this PDF"
76
+ , "Write a summary tweet (280 characters) based on the main points of this PDF"], inputs=[prompt])
77
+
78
+ #showing prompt textbox and making button to submit
79
+ prompt.render()
80
+ send = gr.Button("Send")
81
+
82
+ #I could not figure out how to align the github link to the right and it bugs me
83
+ with gr.Row():
84
+ gr.Markdown("Made by Olivia VonCanon")
85
+ link = "[View on Github](https://github.com/Liv6)"
86
+ gr.Markdown(link)
87
+
88
+ #calling the function if user pushes send button
89
+ send.click(fn=summarize, inputs=[file, prompt], outputs=sum_box)
90
+
91
+ iface.launch()