Spaces:

SoybeanMilk
/

OCR-Translate-and-Summary-GeminiPro

Running

App Files Files Community

OCR-Translate-and-Summary-GeminiPro / app.py

SoybeanMilk

Update app.py

aa0d25a verified 6 months ago

raw

history blame

No virus

10.3 kB

	import os
	import pyperclip
	import gradio as gr
	import nltk
	import pytesseract
	import google.generativeai as genai
	from nltk.tokenize import sent_tokenize
	from transformers import *
	import torch
	from tqdm import tqdm # Import tqdm

	# Download necessary data for nltk
	nltk.download('punkt')

	OCR_TR_DESCRIPTION = '''# OCR Translate and Summary GeminiPro
	<div id="content_align">OCR system based on Tesseract</div>'''

	# Getting the list of available languages for Tesseract
	choices = os.popen('tesseract --list-langs').read().split('\n')[1:-1]

	# tesseract语言列表转pytesseract语言
	def ocr_lang(lang_list):
	lang_str = ""
	lang_len = len(lang_list)
	if lang_len == 1:
	return lang_list[0]
	else:
	for i in range(lang_len):
	lang_list.insert(lang_len - i, "+")

	lang_str = "".join(lang_list[:-1])
	return lang_str


	# ocr tesseract
	def ocr_tesseract(img, languages):
	ocr_str = pytesseract.image_to_string(img, lang=ocr_lang(languages))
	return ocr_str


	# 清除
	def clear_content():
	return None


	import pyperclip

	# 复制到剪贴板
	def cp_text(input_text):
	try:
	pyperclip.copy(input_text)
	except Exception as e:
	print("Error occurred while copying to clipboard")
	print(e)

	# 清除剪贴板
	def cp_clear():
	pyperclip.clear()

	# Split the text into 2000 character chunks
	def process_text_input_text(input_text):
	# Split the text into 2000 character chunks
	chunks = [input_text[i:i+2000] for i in range(0, len(input_text), 2000)]
	return chunks

	def process_and_translate(api_key, input_text, src_lang, tgt_lang):
	# Process the input text into chunks
	chunks = process_text_input_text(input_text)

	# Translate each chunk and collect the results
	translated_chunks = []
	for chunk in chunks:
	if chunk is None or chunk == "":
	translated_chunks.append("System prompt: There is no content to translate!")
	else:
	prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this paragraph. Do not provide any explanations or text apart from the translation.\n{src_lang}: "
	#prompt = f"This is an {src_lang} to {tgt_lang} translation, please provide the {tgt_lang} translation for this sentence. Do not provide any explanations or text apart from the translation.\n{src_lang}: "

	genai.configure(api_key=api_key)

	generation_config = {
	"candidateCount": 1,
	"maxOutputTokens": 2048,
	"temperature": 0.3,
	"topP": 1
	}

	safety_settings = [
	{
	"category": "HARM_CATEGORY_HARASSMENT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_HATE_SPEECH",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	"threshold": "BLOCK_NONE",
	},
	]

	model = genai.GenerativeModel(model_name='gemini-pro')
	response = model.generate_content([prompt, chunk],
	#generation_config=generation_config,
	safety_settings=safety_settings
	)
	translated_chunks.append(response.text)

	# Join the translated chunks back together into a single string
	response = '\n\n'.join(translated_chunks)

	return response

	def process_and_summary(api_key, input_text, src_lang, tgt_lang):
	# Process the input text into chunks
	chunks = process_text_input_text(input_text)

	# Translate each chunk and collect the results
	translated_chunks = []
	for chunk in chunks:
	if chunk is None or chunk == "":
	translated_chunks.append("System prompt: There is no content to translate!")
	else:
	prompt = f"This is an {src_lang} to {tgt_lang} summarization and knowledge key points, please provide the {tgt_lang} summarization and list the {tgt_lang} knowledge key points for this sentence. Do not provide any explanations or text apart from the summarization.\n{src_lang}: "
	genai.configure(api_key=api_key)

	generation_config = {
	"candidateCount": 1,
	"maxOutputTokens": 2048,
	"temperature": 0.3,
	"topP": 1
	}

	safety_settings = [
	{
	"category": "HARM_CATEGORY_HARASSMENT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_HATE_SPEECH",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	"threshold": "BLOCK_NONE",
	},
	{
	"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	"threshold": "BLOCK_NONE",
	},
	]

	model = genai.GenerativeModel(model_name='gemini-pro')
	response = model.generate_content([prompt, chunk],
	#generation_config=generation_config,
	safety_settings=safety_settings
	)
	translated_chunks.append(response.text)

	# Join the translated chunks back together into a single string
	response = '\n\nNext Paragraph\n\n'.join(translated_chunks)

	return response

	# prompt = f"Display language is {tgt_lang}, do not display original text, As a Knowledge Video Content Analysis Expert, specialize in analyzing knowledge videos, identifying and clearly explaining key points in {tgt_lang}, ensuring accurate, easy-to-understand summaries suitable for diverse audiences, analyze, list key points, and explain detailedly below text: "


	def main():

	with gr.Blocks(css='style.css') as ocr_tr:
	gr.Markdown(OCR_TR_DESCRIPTION)

	# -------------- OCR 文字提取 --------------
	with gr.Box():

	with gr.Row():
	gr.Markdown("### Step 01: Text Extraction")

	with gr.Row():
	with gr.Column():
	with gr.Row():
	inputs_img = gr.Image(image_mode="RGB", source="upload", type="pil", label="image")
	with gr.Row():
	inputs_lang = gr.CheckboxGroup(choices=["chi_sim", "eng"],
	type="value",
	value=['eng'],
	label='language')

	with gr.Row():
	clear_img_btn = gr.Button('Clear')
	ocr_btn = gr.Button(value='OCR Extraction', variant="primary")

	with gr.Row():
	# Use Markdown to display clickable URL
	gr.Markdown("[Click here to get API key](https://makersuite.google.com/u/1/app/apikey)")

	with gr.Row():
	# Create a text input box for users to enter their API key
	inputs_api_key = gr.Textbox(label="Please enter your API key here", type="password")

	with gr.Column():
	with gr.Row():
	outputs_text = gr.Textbox(label="Extract content", lines=20)
	src_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
	default="English", label='source language')
	tgt_lang = gr.inputs.Dropdown(choices=["Chinese (Simplified)", "Chinese (Traditional)", "English", "Japanese", "Korean"],
	default="Chinese (Traditional)", label='target language')
	with gr.Row():
	clear_text_btn = gr.Button('Clear')
	translate_btn = gr.Button(value='Translate', variant="primary")
	summary_btn = gr.Button(value='Summary', variant="primary")


	with gr.Row():
	pass

	# -------------- 翻译 --------------
	with gr.Box():

	with gr.Row():
	gr.Markdown("### Step 02: Process")

	with gr.Row():
	outputs_tr_text = gr.Textbox(label="Process Content", lines=20)

	with gr.Row():
	cp_clear_btn = gr.Button(value='Clear Clipboard')
	cp_btn = gr.Button(value='Copy to clipboard', variant="primary")

	# ---------------------- OCR Tesseract ----------------------
	ocr_btn.click(fn=ocr_tesseract, inputs=[inputs_img, inputs_lang], outputs=[
	outputs_text,])
	clear_img_btn.click(fn=clear_content, inputs=[], outputs=[inputs_img])

	# ---------------------- 翻译 ----------------------
	translate_btn.click(fn=process_and_translate, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
	summary_btn.click(fn=process_and_summary, inputs=[inputs_api_key, outputs_text, src_lang, tgt_lang], outputs=[outputs_tr_text])
	clear_text_btn.click(fn=clear_content, inputs=[], outputs=[outputs_text])

	# ---------------------- 复制到剪贴板 ----------------------
	cp_btn.click(fn=cp_text, inputs=[outputs_tr_text], outputs=[])
	cp_clear_btn.click(fn=cp_clear, inputs=[], outputs=[])


	ocr_tr.launch(inbrowser=True)

	if __name__ == '__main__':
	main()