lincolnlegal / app.py
Ari
Update app.py
a8d33c8 verified
raw
history blame
1.35 kB
import gradio as gr
from docx import Document # For .docx handling
from fpdf import FPDF # Python-native PDF generation
from gtts import gTTS
# Function to extract text from a .docx file and convert to PDF
def docx_to_pdf(docx_file):
try:
# Extract text from the .docx file
doc = Document(docx_file.name)
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
extracted_text = '\n'.join(full_text)
# Generate the PDF using fpdf
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(190, 10, txt=extracted_text)
pdf_output_path = "document_output.pdf"
pdf.output(pdf_output_path)
# Convert the text to audio using gTTS
tts = gTTS(text=extracted_text, lang='en', slow=False)
audio_output_path = "document_audio.wav"
tts.save(audio_output_path)
return audio_output_path, extracted_text, pdf_output_path
except Exception as e:
return None, f"An error occurred: {str(e)}", None
# Gradio interface
iface = gr.Interface(
fn=docx_to_pdf,
inputs=gr.File(label="Upload .docx File"),
outputs=[gr.Audio(label="Generated Audio"), gr.Textbox(label="Extracted Text"), gr.File(label="Generated PDF")]
)
if __name__ == "__main__":
iface.launch()