capstone / app.py
nonacnov's picture
Update app.py
874bc90 verified
import os
from groq import Groq
import gradio as gr
from PyPDF2 import PdfReader
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
os.environ['GROQ_API_KEY'] = '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'
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
def summarize(file, prompt):
#preprocess PDF file
#extracting text from all pages
reader = PdfReader(file)
num_pages = len(reader.pages)
all_text = []
#extract text from the first three pages
for page_number in range(min(3, num_pages)):
page = reader.pages[page_number]
text = page.extract_text()
all_text.append(text)
#extract text from the last three pages
for page_number in range(max(0, num_pages - 3), num_pages):
page = reader.pages[page_number]
text = page.extract_text()
all_text.append(text)
full_text = ' '.join(all_text)
#tokenization
tokens = full_text.split()
#removing punctuation after tokenization
cleaned_tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens]
#removing empty tokens
cleaned_tokens = [token for token in cleaned_tokens if token]
#removing stopwords
stop_words = set(stopwords.words('english'))
cleaned_tokens = [token for token in cleaned_tokens if token.lower() not in stop_words]
#concatenate prompt and text
input_text = prompt + " " + full_text
#summarize using groq model
chat_completion = client.chat.completions.create(
messages=[{
"role": "user",
"content": input_text}],
model="Mixtral-8x7b-32768")
summarize = chat_completion.choices[0].message.content
return summarize
#have to define prompt box first to have it under the examples
prompt = gr.Textbox(placeholder="Pick one of the examples or type your prompt...", label = "Prompt Input", lines=8)
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as iface:
#place to upload files; I have it to only show pdfs files, but someone could still upload non-PDF files it would just send back an error
file = gr.File(label="Upload PDF", file_types=["pdf"])
#box for the output
sum_box = gr.Textbox(placeholder="Your summary will appear here...", label = "Summary Output", lines=8, interactive= False)
#examples for users to use, when clicked it fills in the prompt textbox
examples = gr.Examples(examples=[
"Write a two-paragraph summary of this PDF document, emphasizing the key points and conclusions"
, "Write a one-paragraph summary of the key findings or arguments presented in this PDF"
, "Provide a bullet-point outline of the key insights from this PDF"
, "Write a summary tweet (280 characters) based on the main points of this PDF"], inputs=[prompt])
#showing prompt textbox and making button to submit
prompt.render()
send = gr.Button("Send")
#I could not figure out how to align the github link to the right and it bugs me
with gr.Row():
gr.Markdown("Made by Olivia VonCanon")
link = "[View on Github](https://github.com/Liv6)"
gr.Markdown(link)
#calling the function if user pushes send button
send.click(fn=summarize, inputs=[file, prompt], outputs=sum_box)
iface.launch()