|
import os |
|
from groq import Groq |
|
import gradio as gr |
|
from PyPDF2 import PdfReader |
|
import re |
|
import nltk |
|
nltk.download('stopwords') |
|
from nltk.corpus import stopwords |
|
|
|
os.environ['GROQ_API_KEY'] = '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' |
|
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
|
def summarize(file, prompt): |
|
|
|
|
|
|
|
reader = PdfReader(file) |
|
num_pages = len(reader.pages) |
|
|
|
all_text = [] |
|
|
|
|
|
for page_number in range(min(3, num_pages)): |
|
page = reader.pages[page_number] |
|
text = page.extract_text() |
|
all_text.append(text) |
|
|
|
|
|
for page_number in range(max(0, num_pages - 3), num_pages): |
|
page = reader.pages[page_number] |
|
text = page.extract_text() |
|
all_text.append(text) |
|
|
|
full_text = ' '.join(all_text) |
|
|
|
|
|
tokens = full_text.split() |
|
|
|
|
|
cleaned_tokens = [re.sub(r'[^\w\s]', '', token) for token in tokens] |
|
|
|
|
|
cleaned_tokens = [token for token in cleaned_tokens if token] |
|
|
|
|
|
stop_words = set(stopwords.words('english')) |
|
cleaned_tokens = [token for token in cleaned_tokens if token.lower() not in stop_words] |
|
|
|
|
|
input_text = prompt + " " + full_text |
|
|
|
|
|
chat_completion = client.chat.completions.create( |
|
messages=[{ |
|
"role": "user", |
|
"content": input_text}], |
|
model="Mixtral-8x7b-32768") |
|
|
|
summarize = chat_completion.choices[0].message.content |
|
return summarize |
|
|
|
|
|
prompt = gr.Textbox(placeholder="Pick one of the examples or type your prompt...", label = "Prompt Input", lines=8) |
|
|
|
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple")) as iface: |
|
|
|
file = gr.File(label="Upload PDF", file_types=["pdf"]) |
|
|
|
|
|
sum_box = gr.Textbox(placeholder="Your summary will appear here...", label = "Summary Output", lines=8, interactive= False) |
|
|
|
|
|
examples = gr.Examples(examples=[ |
|
"Write a two-paragraph summary of this PDF document, emphasizing the key points and conclusions" |
|
, "Write a one-paragraph summary of the key findings or arguments presented in this PDF" |
|
, "Provide a bullet-point outline of the key insights from this PDF" |
|
, "Write a summary tweet (280 characters) based on the main points of this PDF"], inputs=[prompt]) |
|
|
|
|
|
prompt.render() |
|
send = gr.Button("Send") |
|
|
|
|
|
with gr.Row(): |
|
gr.Markdown("Made by Olivia VonCanon") |
|
link = "[View on Github](https://github.com/Liv6)" |
|
gr.Markdown(link) |
|
|
|
|
|
send.click(fn=summarize, inputs=[file, prompt], outputs=sum_box) |
|
|
|
iface.launch() |