euler314's picture
Update app.py
0c835d8 verified
raw
history blame
7.23 kB
"""
Streamlit Universal File-Format Converter
----------------------------------------
A Streamlit app for Hugging Face Spaces that **actually converts** file
contents across a wide array of formats, leveraging local libraries
(no API keys needed):
• **Images** via Pillow (JPEG, PNG, GIF, BMP, TIFF, ICO, WebP)
• **Text & markup** via pypandoc (MD, HTML, LaTeX, DOCX, PDF, etc.)
• **Office docs** via unoconv + LibreOffice headless (PDF, DOCX, PPTX, XLSX)
• **Audio/video** via ffmpeg-python (MP3, WAV, MP4, AVI, MKV, MOV, etc.)
• **MIME detection** via python-magic
Disallowed uploads: `.exe`, `.bin`
All outputs are streamed into a ZIP for download.
Created 2025-05-22 • v3
"""
from __future__ import annotations
# Set up a writable Streamlit home BEFORE importing streamlit
import os, pathlib
os.environ.setdefault("STREAMLIT_HOME", "/tmp/.streamlit")
os.environ.setdefault("HOME", "/tmp")
pathlib.Path(os.environ["STREAMLIT_HOME"]).mkdir(parents=True, exist_ok=True)
import io
import zipfile
import tempfile
import subprocess
from datetime import datetime
from pathlib import Path
import streamlit as st
from PIL import Image
import pypandoc
import ffmpeg
import magic # python-magic for mime detection
# -----------------------------------------------------------------------------
# Supported extensions
# -----------------------------------------------------------------------------
IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff", ".ico", ".webp"}
TEXT_EXTS = {".txt", ".md", ".csv", ".json", ".xml", ".html", ".css", ".js"}
MEDIA_EXTS = {".mp3", ".wav", ".mp4", ".avi", ".mkv", ".mov"}
DOC_EXTS = {".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".odt", ".ods"}
ALLOWED_TARGET_EXTS = sorted(IMAGE_EXTS | TEXT_EXTS | MEDIA_EXTS | DOC_EXTS)
DISALLOWED_SOURCE_EXTS = {".exe", ".bin"}
# -----------------------------------------------------------------------------
# UI elements
# -----------------------------------------------------------------------------
def sidebar_target_extension() -> str:
st.sidebar.header("Settings")
query = st.sidebar.text_input("Filter extensions… (optional)")
choices = [e for e in ALLOWED_TARGET_EXTS if query.lower() in e]
if not choices:
st.sidebar.error("No extension matches that filter.")
choices = ALLOWED_TARGET_EXTS
return st.sidebar.selectbox(
"Target extension for **all** files", choices, index=choices.index(".pdf") if ".pdf" in choices else 0
)
def uploader():
return st.file_uploader(
"Upload files to convert", type=None, accept_multiple_files=True
)
# -----------------------------------------------------------------------------
# Conversion functions
# -----------------------------------------------------------------------------
def convert_image(data: bytes, target_ext: str) -> bytes:
img = Image.open(io.BytesIO(data))
buf = io.BytesIO()
fmt = {".jpg":"JPEG", ".jpeg":"JPEG", ".png":"PNG", ".gif":"GIF",
".bmp":"BMP", ".tiff":"TIFF", ".ico":"ICO", ".webp":"WEBP"}[target_ext]
img.save(buf, format=fmt)
return buf.getvalue()
def convert_text_markup(data: bytes, orig_ext: str, target_ext: str) -> bytes:
text = data.decode("utf-8", errors="ignore")
return pypandoc.convert_text(text, to=target_ext.lstrip('.'), format=orig_ext.lstrip('.')).encode('utf-8')
def convert_office(temp_dir: str, data: bytes, orig_ext: str, target_ext: str) -> bytes:
# Use unoconv to convert office files
suffix_in = orig_ext
suffix_out = target_ext
in_path = Path(temp_dir) / f"input{suffix_in}"
out_path = Path(temp_dir) / f"output{suffix_out}"
in_path.write_bytes(data)
subprocess.run(["unoconv", "-f", suffix_out.lstrip('.'), "-o", str(out_path), str(in_path)], check=True)
return out_path.read_bytes()
def convert_media(data: bytes, target_ext: str) -> bytes:
# ffmpeg-python streaming
process = (
ffmpeg.input('pipe:0')
.output('pipe:1', format=target_ext.lstrip('.'))
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True)
)
out, err = process.communicate(data)
return out
def convert_file(file: st.runtime.uploaded_file_manager.UploadedFile, target_ext: str) -> tuple[bytes, str]:
name = Path(file.name)
orig_ext = name.suffix.lower()
raw = file.read()
if orig_ext in DISALLOWED_SOURCE_EXTS:
raise ValueError(f"Disallowed: {orig_ext}")
mime = magic.from_buffer(raw, mime=True) or ''
try:
if orig_ext in IMAGE_EXTS and target_ext in IMAGE_EXTS:
return convert_image(raw, target_ext), "image converted"
if mime.startswith('text/') or orig_ext in TEXT_EXTS:
if orig_ext != target_ext:
return convert_text_markup(raw, orig_ext, target_ext), "text/markup converted"
if orig_ext in DOC_EXTS or target_ext in DOC_EXTS:
with tempfile.TemporaryDirectory() as tmp:
return convert_office(tmp, raw, orig_ext, target_ext), "office/doc converted"
if mime.startswith(('audio/','video/')) or orig_ext in MEDIA_EXTS:
if orig_ext != target_ext:
return convert_media(raw, target_ext), "media converted"
except Exception as e:
st.warning(f"⚠️ Conversion failed for {file.name}: {e}. Falling back to rename.")
# Fallback: no conversion, just rename
return raw, "renamed only"
# -----------------------------------------------------------------------------
# ZIP packaging
# -----------------------------------------------------------------------------
def package_zip(files: list[st.runtime.uploaded_file_manager.UploadedFile], target_ext: str) -> io.BytesIO:
buf = io.BytesIO()
with zipfile.ZipFile(buf, 'w', zipfile.ZIP_DEFLATED) as zf:
for file in files:
name = Path(file.name)
if name.suffix.lower() in DISALLOWED_SOURCE_EXTS:
st.warning(f"Skipping disallowed file: {name.name}")
continue
data, note = convert_file(file, target_ext)
out_name = name.with_suffix(target_ext).name
zf.writestr(out_name, data)
st.success(f"{note}: {name.name}{out_name}")
buf.seek(0)
return buf
# -----------------------------------------------------------------------------
# Main
# -----------------------------------------------------------------------------
def main():
st.set_page_config("Universal Converter", page_icon="🔄", layout="centered")
st.title("🔄 Universal File-Format Converter")
st.write("Upload files of any format; choose a new extension; download a ZIP of converted files.")
target_ext = sidebar_target_extension()
files = uploader()
if files and st.button("Convert & Download 🚀"):
zip_buf = package_zip(files, target_ext)
ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
st.download_button("⬇️ Download ZIP", zip_buf,
file_name=f"converted_{ts}.zip",
mime='application/zip')
st.caption("© 2025 Universal Converter • Streamlit • Hugging Face Spaces")
if __name__ == '__main__':
main()