forest / main.py
smf2010's picture
Update main.py
6c109a3 verified
from flask import Flask, request, render_template
from docx import Document
from io import StringIO
from lxml import etree
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/convert', methods=['POST'])
def convert():
file = request.files['file']
document = Document(file)
html = document_to_html(document)
return html
def document_to_html(document):
"""
将Word文档转换为HTML格式字符串。
:param document: Word文档对象
:return: HTML格式字符串
"""
# 将Word文档转换为XML
xml = document_to_xml(document)
# 使用lxml库将XML转换为HTML
xslt = etree.parse('docx2html.xslt')
transform = etree.XSLT(xslt)
html = str(transform(xml))
return html
def document_to_xml(document):
"""
将Word文档转换为XML格式字符串。
:param document: Word文档对象
:return: XML格式字符串
"""
xml = StringIO()
with open(xml, 'wb') as f:
document.save(f)
xml.seek(0)
return xml.read()
if __name__ == '__main__':
app.run(port=7860,host='0.0.0.0')