Spaces:

arithescientist
/

lincolnlegal

Running

File size: 1,809 Bytes

17e34a5
f0068ab
3813c2d
e249de1
cffceba
 
 
 
 
 
 
c763d6c
f0068ab
 
7f2b3e5
f0068ab
 
 
 
 
 
7f2b3e5
e249de1
 
 
 
 
 
 
 
cffceba
f0068ab
cffceba
f0068ab
 
 
 
7f2b3e5
f0068ab
 
 
7f2b3e5
 
 
 
 
f0068ab
 
 
7f2b3e5
0751294
342a4a2
7f2b3e5

import gradio as gr
from docx import Document  # For .docx handling
from gtts import gTTS
import os
import pdfkit

# Path to the wkhtmltopdf executable (update this path based on the Dockerfile output)
WKHTMLTOPDF_PATH = "/usr/local/bin/wkhtmltopdf"  # You will get this from the Dockerfile output

# Configure pdfkit to use the correct wkhtmltopdf path
config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH)

# Function to extract text from a .docx file and convert to PDF
def docx_to_pdf(docx_file):
    try:
        # Extract text from the .docx file
        doc = Document(docx_file.name)
        full_text = []
        for para in doc.paragraphs:
            full_text.append(para.text)
        extracted_text = '\n'.join(full_text)

        # Convert the extracted text into an HTML format for pdfkit
        html_content = f"""
        <html>
        <head><meta charset="UTF-8"></head>
        <body><pre>{extracted_text}</pre></body>
        </html>
        """
        
        # Generate the PDF using pdfkit with the custom wkhtmltopdf path
        pdf_output_path = "document_output.pdf"
        pdfkit.from_string(html_content, pdf_output_path, configuration=config)

        # Convert the text to audio using gTTS
        tts = gTTS(text=extracted_text, lang='en', slow=False)
        audio_output_path = "document_audio.wav"
        tts.save(audio_output_path)

        return audio_output_path, extracted_text, pdf_output_path

    except Exception as e:
        return None, f"An error occurred: {str(e)}", None

# Gradio interface
iface = gr.Interface(
    fn=docx_to_pdf,
    inputs=gr.File(label="Upload .docx File"),
    outputs=[gr.Audio(label="Generated Audio"), gr.Textbox(label="Extracted Text"), gr.File(label="Generated PDF")]
)

if __name__ == "__main__":
    iface.launch()