Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import
|
2 |
import fitz
|
3 |
from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
|
4 |
from multiprocessing import Pool, cpu_count
|
@@ -63,20 +63,16 @@ def translate_summary(summary, lang):
|
|
63 |
|
64 |
return " ".join(translated_chunks)
|
65 |
|
66 |
-
|
67 |
-
|
68 |
# Function to read PDF and summarize and translate chunk by chunk
|
69 |
-
def summarize_and_translate_pdf(
|
70 |
-
# Save
|
71 |
-
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
72 |
-
temp_file.write(
|
73 |
-
temp_file_path = temp_file.name
|
74 |
|
75 |
try:
|
76 |
-
doc = fitz.open(
|
77 |
except FileNotFoundError:
|
78 |
-
|
79 |
-
return []
|
80 |
|
81 |
total_chunks = len(doc)
|
82 |
chunks = []
|
@@ -91,42 +87,26 @@ def summarize_and_translate_pdf(uploaded_file, lang):
|
|
91 |
translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
|
92 |
|
93 |
# Delete temporary file
|
94 |
-
|
95 |
|
96 |
return translated_chunks
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
"Malayalam": "ml_IN", "Mongolian": "mn_MN", "Marathi": "mr_IN", "Polish": "pl_PL", "Pashto": "ps_AF",
|
118 |
-
"Portuguese": "pt_XX", "Swedish": "sv_SE", "Swahili": "sw_KE", "Tamil": "ta_IN", "Telugu": "te_IN",
|
119 |
-
"Thai": "th_TH", "Tagalog": "tl_XX", "Ukrainian": "uk_UA", "Urdu": "ur_PK", "Xhosa": "xh_ZA",
|
120 |
-
"Galician": "gl_ES", "Slovene": "sl_SI"
|
121 |
-
}
|
122 |
-
|
123 |
-
lang = st.selectbox("Select language for translation", list(languages.keys()))
|
124 |
-
|
125 |
-
# Translate PDF
|
126 |
-
if st.button("Summarize and Translate"):
|
127 |
-
translated_chunks = summarize_and_translate_pdf(uploaded_file, languages[lang])
|
128 |
-
|
129 |
-
# Display translated text
|
130 |
-
st.header("Translated Summary")
|
131 |
-
for chunk in translated_chunks:
|
132 |
-
st.write(chunk)
|
|
|
1 |
+
import gradio as gr
|
2 |
import fitz
|
3 |
from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
|
4 |
from multiprocessing import Pool, cpu_count
|
|
|
63 |
|
64 |
return " ".join(translated_chunks)
|
65 |
|
|
|
|
|
66 |
# Function to read PDF and summarize and translate chunk by chunk
|
67 |
+
def summarize_and_translate_pdf(pdf_content, lang):
|
68 |
+
# Save PDF content to a temporary file
|
69 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
70 |
+
temp_file.write(pdf_content)
|
|
|
71 |
|
72 |
try:
|
73 |
+
doc = fitz.open(temp_file.name)
|
74 |
except FileNotFoundError:
|
75 |
+
return "File not found. Please make sure the file path is correct."
|
|
|
76 |
|
77 |
total_chunks = len(doc)
|
78 |
chunks = []
|
|
|
87 |
translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
|
88 |
|
89 |
# Delete temporary file
|
90 |
+
temp_file.close()
|
91 |
|
92 |
return translated_chunks
|
93 |
|
94 |
+
# Gradio Interface
|
95 |
+
def summarize_and_translate_interface(pdf_content, lang):
|
96 |
+
translated_chunks = summarize_and_translate_pdf(pdf_content, lang)
|
97 |
+
return "\n".join(translated_chunks)
|
98 |
+
|
99 |
+
# Gradio UI
|
100 |
+
input_pdf = gr.inputs.File(label="Upload a PDF file", type="file")
|
101 |
+
language = gr.inputs.Dropdown(choices=["Arabic", "Czech", "German", "English", "Spanish", "Estonian", "Finnish",
|
102 |
+
"French", "Gujarati", "Hindi", "Italian", "Japanese", "Kazakh", "Korean",
|
103 |
+
"Lithuanian", "Latvian", "Burmese", "Nepali", "Dutch", "Romanian", "Russian",
|
104 |
+
"Sinhala", "Turkish", "Vietnamese", "Chinese", "Afrikaans", "Azerbaijani",
|
105 |
+
"Bengali", "Persian", "Hebrew", "Croatian", "Indonesian", "Georgian", "Khmer",
|
106 |
+
"Macedonian", "Malayalam", "Mongolian", "Marathi", "Polish", "Pashto",
|
107 |
+
"Portuguese", "Swedish", "Swahili", "Tamil", "Telugu", "Thai", "Tagalog",
|
108 |
+
"Ukrainian", "Urdu", "Xhosa", "Galician", "Slovene"],
|
109 |
+
label="Select language for translation")
|
110 |
+
output_text = gr.outputs.Textbox(label="Translated Summary")
|
111 |
+
|
112 |
+
gr.Interface(summarize_and_translate_interface, inputs=[input_pdf, language], outputs=output_text).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|