Spaces:
Sleeping
Sleeping
import gradio as gr | |
from docx import Document # For .docx handling | |
from gtts import gTTS | |
import os | |
import pdfkit | |
# Path to the wkhtmltopdf executable (update this path based on the Dockerfile output) | |
WKHTMLTOPDF_PATH = "/usr/local/bin/wkhtmltopdf" # You will get this from the Dockerfile output | |
# Configure pdfkit to use the correct wkhtmltopdf path | |
config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH) | |
# Function to extract text from a .docx file and convert to PDF | |
def docx_to_pdf(docx_file): | |
try: | |
# Extract text from the .docx file | |
doc = Document(docx_file.name) | |
full_text = [] | |
for para in doc.paragraphs: | |
full_text.append(para.text) | |
extracted_text = '\n'.join(full_text) | |
# Convert the extracted text into an HTML format for pdfkit | |
html_content = f""" | |
<html> | |
<head><meta charset="UTF-8"></head> | |
<body><pre>{extracted_text}</pre></body> | |
</html> | |
""" | |
# Generate the PDF using pdfkit with the custom wkhtmltopdf path | |
pdf_output_path = "document_output.pdf" | |
pdfkit.from_string(html_content, pdf_output_path, configuration=config) | |
# Convert the text to audio using gTTS | |
tts = gTTS(text=extracted_text, lang='en', slow=False) | |
audio_output_path = "document_audio.wav" | |
tts.save(audio_output_path) | |
return audio_output_path, extracted_text, pdf_output_path | |
except Exception as e: | |
return None, f"An error occurred: {str(e)}", None | |
# Gradio interface | |
iface = gr.Interface( | |
fn=docx_to_pdf, | |
inputs=gr.File(label="Upload .docx File"), | |
outputs=[gr.Audio(label="Generated Audio"), gr.Textbox(label="Extracted Text"), gr.File(label="Generated PDF")] | |
) | |
if __name__ == "__main__": | |
iface.launch() | |