File size: 3,572 Bytes
854fcbb
 
 
 
 
 
 
 
 
53da45d
854fcbb
 
 
 
 
 
53da45d
854fcbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ca041b
 
854fcbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fe9087
 
 
854fcbb
7ca041b
854fcbb
 
 
 
 
53da45d
854fcbb
 
 
 
 
167402c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# https://huggingface.co/spaces/yilmazmusa_ml/abstract_summarizer

# Here are the imports
import warnings
import pdfplumber
import torch
from transformers import pipeline, AutoProcessor, AutoModel
import numpy as np
import gradio as gr
from io import BytesIO
from scipy.io.wavfile import write as write_wav
warnings.filterwarnings("ignore")


# Here is the code
def extract_abstract(uploaded_file):
    pdf_bytes = BytesIO(uploaded_file)
    with pdfplumber.open(pdf_bytes) as pdf:
        abstract = ""
        # Iterate through each page
        for page in pdf.pages:
            text = page.extract_text(x_tolerance = 1, y_tolerance = 1) # these parameters are set 1 in order to get spaces between words and lines
            # Search for the "Abstract" keyword
            if "abstract" in text.lower():
                # Found the "Abstract" keyword
                start_index = text.lower().find("abstract") # find the "abstract" title as starter index
                end_index = text.lower().find("introduction") # find the "introduction" title as end index
                abstract = text[start_index:end_index]
                break
    print(abstract)
    return abstract

def process_summary(summary):
    # Split the summary by the first period
    summary = summary[0]["summary_text"]
    sentences = summary.split('.', 1)
    if len(sentences) > 0:
        # Retrieve the first part before the period
        processed_summary = sentences[0].strip() + "."
        # Replace "-" with an empty string
        processed_summary = processed_summary.replace("-", "")
        return processed_summary

# Function for summarization and audio conversion
def summarize_and_convert_to_audio(pdf_file):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    
    # Move models and related tensors to CUDA device if available
    processor = AutoProcessor.from_pretrained("suno/bark")
    model = AutoModel.from_pretrained("suno/bark").to(device)
    
    # Extract abstract
    abstract_text = extract_abstract(pdf_file)

    if not abstract_text:
        return "No 'abstract' section found in the uploaded PDF. Please upload a different PDF."
    
    # Summarize the abstract
    summarization_pipeline = pipeline(task='summarization', model='knkarthick/MEETING_SUMMARY', min_length=15, max_length=30)
    summarized_text = summarization_pipeline(abstract_text)
    one_sentence_summary = process_summary(summarized_text)

    print(one_sentence_summary)
    
    # Text-to-audio conversion
    inputs = processor(
        text=[one_sentence_summary],
        return_tensors="pt",
    )
    inputs = inputs.to(device)
    
    speech_values = model.generate(**inputs, do_sample=True)
    sampling_rate = model.generation_config.sample_rate
    
    # Convert speech values to audio data
    audio_data = speech_values.cpu().numpy().squeeze()

    # Convert audio data to bytes
    buffer = BytesIO()
    write_wav(buffer, sampling_rate, (audio_data * (2**15)).astype(np.int16))  # Scaling audio_data to int16 format
    audio_bytes = buffer.getvalue()

    return audio_bytes, sampling_rate  # Return audio as bytes with sampling rate


# Create a Gradio interface
iface = gr.Interface(
    fn=summarize_and_convert_to_audio,
    inputs=gr.UploadButton(label="Upload PDF", type="binary", file_types=["pdf"]),  # Set to accept only PDF files
    outputs=gr.Audio(label="Audio"),
    title="PDF Abstract Summarizer",
    description="Upload a PDF with an abstract to generate a summarized audio."
)

iface.launch(share=True)