Rajut commited on
Commit
14e2683
·
verified ·
1 Parent(s): 3d28167

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -47
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  import fitz
3
  from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
4
  from multiprocessing import Pool, cpu_count
@@ -63,20 +63,16 @@ def translate_summary(summary, lang):
63
 
64
  return " ".join(translated_chunks)
65
 
66
-
67
-
68
  # Function to read PDF and summarize and translate chunk by chunk
69
- def summarize_and_translate_pdf(uploaded_file, lang):
70
- # Save uploaded PDF to a temporary file
71
- with tempfile.NamedTemporaryFile(delete=False) as temp_file:
72
- temp_file.write(uploaded_file.read())
73
- temp_file_path = temp_file.name
74
 
75
  try:
76
- doc = fitz.open(temp_file_path)
77
  except FileNotFoundError:
78
- st.error("File not found. Please make sure the file path is correct.")
79
- return []
80
 
81
  total_chunks = len(doc)
82
  chunks = []
@@ -91,42 +87,26 @@ def summarize_and_translate_pdf(uploaded_file, lang):
91
  translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
92
 
93
  # Delete temporary file
94
- os.unlink(temp_file_path)
95
 
96
  return translated_chunks
97
 
98
-
99
- # Streamlit UI
100
- st.title("PDF Summarization and Translation")
101
-
102
- # File upload
103
- uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
104
- if uploaded_file:
105
- # Display uploaded file
106
- st.write("Uploaded PDF file:", uploaded_file.name)
107
-
108
- # Language selection
109
- languages = {
110
- "Arabic": "ar_AR", "Czech": "cs_CZ", "German": "de_DE", "English": "en_XX", "Spanish": "es_XX",
111
- "Estonian": "et_EE", "Finnish": "fi_FI", "French": "fr_XX", "Gujarati": "gu_IN", "Hindi": "hi_IN",
112
- "Italian": "it_IT", "Japanese": "ja_XX", "Kazakh": "kk_KZ", "Korean": "ko_KR", "Lithuanian": "lt_LT",
113
- "Latvian": "lv_LV", "Burmese": "my_MM", "Nepali": "ne_NP", "Dutch": "nl_XX", "Romanian": "ro_RO",
114
- "Russian": "ru_RU", "Sinhala": "si_LK", "Turkish": "tr_TR", "Vietnamese": "vi_VN", "Chinese": "zh_CN",
115
- "Afrikaans": "af_ZA", "Azerbaijani": "az_AZ", "Bengali": "bn_IN", "Persian": "fa_IR", "Hebrew": "he_IL",
116
- "Croatian": "hr_HR", "Indonesian": "id_ID", "Georgian": "ka_GE", "Khmer": "km_KH", "Macedonian": "mk_MK",
117
- "Malayalam": "ml_IN", "Mongolian": "mn_MN", "Marathi": "mr_IN", "Polish": "pl_PL", "Pashto": "ps_AF",
118
- "Portuguese": "pt_XX", "Swedish": "sv_SE", "Swahili": "sw_KE", "Tamil": "ta_IN", "Telugu": "te_IN",
119
- "Thai": "th_TH", "Tagalog": "tl_XX", "Ukrainian": "uk_UA", "Urdu": "ur_PK", "Xhosa": "xh_ZA",
120
- "Galician": "gl_ES", "Slovene": "sl_SI"
121
- }
122
-
123
- lang = st.selectbox("Select language for translation", list(languages.keys()))
124
-
125
- # Translate PDF
126
- if st.button("Summarize and Translate"):
127
- translated_chunks = summarize_and_translate_pdf(uploaded_file, languages[lang])
128
-
129
- # Display translated text
130
- st.header("Translated Summary")
131
- for chunk in translated_chunks:
132
- st.write(chunk)
 
1
+ import gradio as gr
2
  import fitz
3
  from transformers import pipeline, MBart50TokenizerFast, MBartForConditionalGeneration
4
  from multiprocessing import Pool, cpu_count
 
63
 
64
  return " ".join(translated_chunks)
65
 
 
 
66
  # Function to read PDF and summarize and translate chunk by chunk
67
+ def summarize_and_translate_pdf(pdf_content, lang):
68
+ # Save PDF content to a temporary file
69
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
70
+ temp_file.write(pdf_content)
 
71
 
72
  try:
73
+ doc = fitz.open(temp_file.name)
74
  except FileNotFoundError:
75
+ return "File not found. Please make sure the file path is correct."
 
76
 
77
  total_chunks = len(doc)
78
  chunks = []
 
87
  translated_chunks = pool.starmap(summarize_and_translate_chunk, [(chunk, lang) for chunk in chunks])
88
 
89
  # Delete temporary file
90
+ temp_file.close()
91
 
92
  return translated_chunks
93
 
94
+ # Gradio Interface
95
+ def summarize_and_translate_interface(pdf_content, lang):
96
+ translated_chunks = summarize_and_translate_pdf(pdf_content, lang)
97
+ return "\n".join(translated_chunks)
98
+
99
+ # Gradio UI
100
+ input_pdf = gr.inputs.File(label="Upload a PDF file", type="file")
101
+ language = gr.inputs.Dropdown(choices=["Arabic", "Czech", "German", "English", "Spanish", "Estonian", "Finnish",
102
+ "French", "Gujarati", "Hindi", "Italian", "Japanese", "Kazakh", "Korean",
103
+ "Lithuanian", "Latvian", "Burmese", "Nepali", "Dutch", "Romanian", "Russian",
104
+ "Sinhala", "Turkish", "Vietnamese", "Chinese", "Afrikaans", "Azerbaijani",
105
+ "Bengali", "Persian", "Hebrew", "Croatian", "Indonesian", "Georgian", "Khmer",
106
+ "Macedonian", "Malayalam", "Mongolian", "Marathi", "Polish", "Pashto",
107
+ "Portuguese", "Swedish", "Swahili", "Tamil", "Telugu", "Thai", "Tagalog",
108
+ "Ukrainian", "Urdu", "Xhosa", "Galician", "Slovene"],
109
+ label="Select language for translation")
110
+ output_text = gr.outputs.Textbox(label="Translated Summary")
111
+
112
+ gr.Interface(summarize_and_translate_interface, inputs=[input_pdf, language], outputs=output_text).launch()