capstone / app.py
nonacnov's picture
Update app.py
874bc90 verified
raw
history blame
3.32 kB
import os
from groq import Groq
import gradio as gr
from PyPDF2 import PdfReader
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
os.environ['GROQ_API_KEY'] = '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'
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
def summarize(file, prompt):
#preprocess PDF file
#extracting text from all pages
reader = PdfReader(file)
num_pages = len(reader.pages)
all_text = []
#extract text from the first three pages
for page_number in range(min(3, num_pages)):
page = reader.pages[page_number]
text = page.extract_text()
all_text.append(text)
#extract text from the last three pages
for page_number in range(max(0, num_pages - 3), num_pages):
page = reader.pages[page_number]
text = page.extract_text()
all_text.append(text)
full_text = ' '.join(all_text)
#tokenization
tokens = full_text.split()
#removing punctuation after tokenization
cleaned_tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens]
#removing empty tokens
cleaned_tokens = [token for token in cleaned_tokens if token]
#removing stopwords
stop_words = set(stopwords.words('english'))
cleaned_tokens = [token for token in cleaned_tokens if token.lower() not in stop_words]
#concatenate prompt and text
input_text = prompt + " " + full_text
#summarize using groq model
chat_completion = client.chat.completions.create(
messages=[{
"role": "user",
"content": input_text}],
model="Mixtral-8x7b-32768")
summarize = chat_completion.choices[0].message.content
return summarize
#have to define prompt box first to have it under the examples
prompt = gr.Textbox(placeholder="Pick one of the examples or type your prompt...", label = "Prompt Input", lines=8)
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as iface:
#place to upload files; I have it to only show pdfs files, but someone could still upload non-PDF files it would just send back an error
file = gr.File(label="Upload PDF", file_types=["pdf"])
#box for the output
sum_box = gr.Textbox(placeholder="Your summary will appear here...", label = "Summary Output", lines=8, interactive= False)
#examples for users to use, when clicked it fills in the prompt textbox
examples = gr.Examples(examples=[
"Write a two-paragraph summary of this PDF document, emphasizing the key points and conclusions"
, "Write a one-paragraph summary of the key findings or arguments presented in this PDF"
, "Provide a bullet-point outline of the key insights from this PDF"
, "Write a summary tweet (280 characters) based on the main points of this PDF"], inputs=[prompt])
#showing prompt textbox and making button to submit
prompt.render()
send = gr.Button("Send")
#I could not figure out how to align the github link to the right and it bugs me
with gr.Row():
gr.Markdown("Made by Olivia VonCanon")
link = "[View on Github](https://github.com/Liv6)"
gr.Markdown(link)
#calling the function if user pushes send button
send.click(fn=summarize, inputs=[file, prompt], outputs=sum_box)
iface.launch()