File size: 3,693 Bytes
76d5790
 
6c6a41c
933b444
 
 
6c6a41c
933b444
 
 
6c6a41c
933b444
 
 
 
 
 
 
6c6a41c
933b444
 
6c6a41c
 
 
933b444
 
 
 
 
6c6a41c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933b444
 
 
 
 
 
6c6a41c
933b444
 
6c6a41c
933b444
 
6c6a41c
933b444
 
 
 
6c6a41c
933b444
 
 
 
 
 
6c6a41c
933b444
 
 
 
 
 
6c6a41c
933b444
dda7485
6a0159b
6c6a41c
 
 
 
933b444
 
6c6a41c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# https://elrmnd-vocal-pdf-summarizer.hf.space

# Import libraries

import gradio as gr
import PyPDF2
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from gtts import gTTS
from io import BytesIO

# Function to extract text from PDF
# Defines a function to extract raw text from a PDF file
def extract_text(pdf_file):
    pdfReader = PyPDF2.PdfReader(pdf_file)
    pageObj = pdfReader.pages[0]
    return pageObj.extract_text()

# Function to summarize text
# Defines a function to summarize the extracted text using facebook/bart-large-cnn
def summarize_text(text):
    sentences = text.split(". ")
    start = -1  # Default value if "Abstract" is not found
    end = -1

    for i, sentence in enumerate(sentences):
        if "Abstract" in sentence:
            start = i + 1
            end = start + 6
            break

    if start != -1:
        abstract = ". ".join(sentences[start:end + 1])

        # Load BART model & tokenizer
        tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
        model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary")

        # Tokenize abstract
        inputs = tokenizer(abstract,
                           max_length=1024,
                           return_tensors="pt",
                           truncation=True)

        # Generate summary
        summary_ids = model.generate(inputs['input_ids'],
                                     max_length=50,
                                     min_length=30,
                                     no_repeat_ngram_size=3,
                                     encoder_no_repeat_ngram_size=3,
                                     repetition_penalty=3.5,
                                     num_beams=4,
                                     do_sample=True,
                                     early_stopping=False)

        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

        if '.' in summary:
            index = summary.rindex('.')
            if index != -1:
                summary = summary[:index + 1]
    else:
        summary = "Abstract not found in the document."

    return summary

# Function to convert text to audio
# Defines a function to convert text to an audio file using Google Text-to-Speech
def text_to_audio(text):
    tts = gTTS(text, lang='en')
    buffer = BytesIO()
    tts.write_to_fp(buffer)
    buffer.seek(0)
    return buffer.read()

### Main function
### The main function that ties everything together:
### extracts text, summarizes, and converts to audio.
def audio_pdf(pdf_file):
    text = extract_text(pdf_file)
    summary = summarize_text(text)
    audio = text_to_audio(summary)
    return summary, audio

# Define Gradio interface
# Gradio web interface with a file input, text output to display the summary
# and audio output to play the audio file. # Launches the interface
inputs = gr.File()
summary_text = gr.Text()
audio_summary = gr.Audio()

iface = gr.Interface(
    fn=audio_pdf,
    inputs=inputs,
    outputs=[summary_text, audio_summary],
    title="The Vocal PDF Summarizer",
    description="I will summarize PDFs that have an abstract and transform them into audio. If an abstract is not present in the document, a message will be displayed.",
    examples=["Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
              "Article 6 BloombergGPT_ A Large Language Model for Finance.pdf",
              "Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
              "Article 8 Llama 2_ Open Foundation and Fine-Tuned Chat Models.pdf"
             ]
)

iface.launch()  # Launch the interface