Spaces:

hhhwmws
/

test_Idiot-Cultivation-System

Sleeping

App Files Files Community

hhhwmws commited on Aug 8

Commit

435fc98

•

1 Parent(s): 721e0c6

Upload app.py

Browse files

Files changed (1) hide show

app.py +112 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import gradio as gr
+from io import BytesIO
+import fitz
+import tempfile
+import openai
+class TranslationAgent:
+    def __init__(self, openai_key):
+        self.memory = []
+        system_msg = "You are a translator from english to Chinese.\n" \
+                     " The only thing you do is to translate.\n" \
+                     " You don't write anything other then the translation of the text you get.\n" \
+                     " The user will only provide the text without asking anything, but what he wants is the translation.\n" \
+                     " Never return the translation of a previously translated part!\n " \
+                     "The text you will need to translate will often include none sense stuff because it is coming from a text extraction of a pdf file including images and table.\n" \
+                     " Do your best to translate also this messy parts."
+        self.memory.append({"role": "system", "content": system_msg})
+        openai.api_key = openai_key
+    def fade_memory(self):
+        if len(self.memory) >= 5:
+            del self.memory[1:3]
+    def translate_chunk(self, chunk):
+        self.memory.append({"role": "user", "content": chunk})
+        response = openai.ChatCompletion.create(
+            model="gpt-3.5-turbo",
+            messages=self.memory
+        )
+        reply = response["choices"][0]["message"]["content"]
+        self.memory.append({"role": "assistant", "content": reply})
+        self.fade_memory()
+        return reply
+def extract_text_from_pdf(pdf, start, stop):
+    text = ""
+    with fitz.open(stream=BytesIO(pdf), filetype='pdf') as doc:  # remove .read()
+        for i, page in enumerate(doc):
+            if start <= i:
+                if i <= stop:
+                    text += page.get_text()
+                else:
+                    break
+    return text
+def split_text(text, chunk_size=100):
+    words = text.split()
+    chunks = []
+    current_chunk_words = []
+    for word in words:
+        current_chunk_words.append(word)
+        if word.endswith('.') and len(current_chunk_words) >= chunk_size:
+            chunks.append(' '.join(current_chunk_words))
+            current_chunk_words = []
+    # add the last chunk if any words remain
+    if current_chunk_words:
+        chunks.append(' '.join(current_chunk_words))
+    return chunks
+def translate_pdf(openai_key, pdf, start, stop):
+    translator = TranslationAgent(openai_key)
+    translated_text = ""
+    error_message = "Translation Successful"
+    try:
+        # extract text
+        if pdf is not None:
+            text = extract_text_from_pdf(pdf, start=start, stop=stop)
+            chunks = split_text(text)
+            translated_chunks = []
+            for chunk in chunks:
+                translated_chunk = translator.translate_chunk(chunk)
+                translated_chunks.append(translated_chunk + " ")
+            translated_text = ' '.join(translated_chunks)
+    except Exception as e:
+        error_message = f"Translation Failed: {e}"
+    # Create a temporary file with a specific prefix
+    temp = tempfile.NamedTemporaryFile(delete=False, prefix="translatedPDF_", suffix=".txt")
+    # Write to the temporary file
+    with open(temp.name, 'w', encoding='utf-8') as f:
+        f.write(translated_text)
+    return translated_text, error_message, temp.name
+iface = gr.Interface(
+    fn=translate_pdf,
+    inputs=[
+        gr.Textbox(lines=1, label="OpenAI API key",
+                          placeholder="Enter your OpenAI API key here"),
+        gr.File(type="binary", label="PDF file", ),
+        gr.Number(label="Starting Page", ),
+        gr.Number(label="Final Page")
+    ],
+    outputs=["text", "text", gr.File(label="Translated Text File")],
+    title="Pdf Translator: English ==> Chinese",
+)
+iface.launch()