File size: 3,643 Bytes
f6f97c8
f2415f1
f6f97c8
f2415f1
 
b12c624
f6f97c8
 
f2415f1
 
 
 
 
f6f97c8
 
 
 
 
 
63cb281
 
 
 
f6f97c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2415f1
338bb7c
f2415f1
 
f6f97c8
 
 
f2415f1
f6f97c8
 
 
 
 
f2415f1
f6f97c8
 
 
 
 
f2415f1
f6f97c8
 
 
 
 
 
 
 
63cb281
f6f97c8
 
 
 
 
 
 
 
 
f2415f1
63cb281
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Response
from fastapi.responses import FileResponse
from pydantic import BaseModel
from pdf2docx import Converter
import os
import shutil
import pdfkit
import uuid

router = APIRouter()

TEMP_DIR = "/.tempfiles"

class HTMLRequest(BaseModel):
    html_content: str

def ensure_temp_dir():
    os.makedirs(TEMP_DIR, exist_ok=True)

def remove_file(path: str):
    if os.path.exists(path):
        os.unlink(path)

def generate_temp_filepath(extension: str) -> str:
    return os.path.join(TEMP_DIR, f"temp_{uuid.uuid4()}.{extension}")

def html_to_pdf(html_content: str, output_path: str) -> None:
    options = {
        'page-size': 'A4',
        'margin-top': '0.75in',
        'margin-right': '0.75in',
        'margin-bottom': '0.75in',
        'margin-left': '0.75in',
        'encoding': "UTF-8",
    }
    pdfkit.from_string(html_content, output_path, options=options)

def pdf_to_docx(pdf_path: str, docx_path: str) -> None:
    cv = Converter(pdf_path)
    cv.convert(docx_path)
    cv.close()

def handle_conversion(convert_func, input_path: str, output_path: str, background_tasks: BackgroundTasks):
    try:
        convert_func(input_path, output_path)
        if not os.path.exists(output_path):
            raise FileNotFoundError(f"Converted file not found: {output_path}")
        background_tasks.add_task(remove_file, input_path)
        background_tasks.add_task(remove_file, output_path)
        return FileResponse(
            output_path,
            media_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            filename=f"converted_document_{uuid.uuid4()}.docx"
        )
    except Exception as e:
        remove_file(input_path)
        remove_file(output_path)
        raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")

@router.post("/convert/pdf_to_docx")
async def convert_pdf_to_docx(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
    if not file.filename.endswith('.pdf'):
        raise HTTPException(status_code=400, detail="File must be a PDF")
    
    ensure_temp_dir()
    pdf_temp_path = generate_temp_filepath("pdf")
    docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
    
    with open(pdf_temp_path, "wb") as pdf_file:
        shutil.copyfileobj(file.file, pdf_file)
    
    return handle_conversion(pdf_to_docx, pdf_temp_path, docx_temp_path, background_tasks)

@router.post("/convert/html_to_pdf")
async def convert_html_to_pdf(request: HTMLRequest):
    ensure_temp_dir()
    pdf_temp_path = generate_temp_filepath("pdf")
    
    try:
        html_to_pdf(request.html_content, pdf_temp_path)
        with open(pdf_temp_path, "rb") as pdf_file:
            pdf_content = pdf_file.read()
        remove_file(pdf_temp_path)
        return Response(content=pdf_content, media_type="application/pdf")
    except Exception as e:
        remove_file(pdf_temp_path)
        raise HTTPException(status_code=500, detail=str(e))

@router.post("/convert/html_to_docx")
async def convert_html_to_docx(background_tasks: BackgroundTasks, request: HTMLRequest):
    ensure_temp_dir()
    pdf_temp_path = generate_temp_filepath("pdf")
    docx_temp_path = pdf_temp_path.replace('.pdf', '.docx')
    
    try:
        html_to_pdf(request.html_content, pdf_temp_path)
        return handle_conversion(pdf_to_docx, pdf_temp_path, docx_temp_path, background_tasks)
    except Exception as e:
        remove_file(pdf_temp_path)
        remove_file(docx_temp_path)
        raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")