juhoinkinen
Select NN ensemble English project by default
45ed86a unverified
raw
history blame
5.19 kB
import gradio as gr
import pytesseract
import cv2
import os
import numpy as np
from annif_client import AnnifClient
def get_annif_projects():
try:
annif = AnnifClient()
projects = annif.projects
if not projects:
raise ValueError("No projects found from Annif client")
proj_ids = [project["project_id"] for project in projects]
proj_names = [project["name"] for project in projects]
return annif, proj_ids, proj_names
except Exception as e:
print(f"Error initializing Annif client: {str(e)}")
return None, [], []
annif, proj_ids, proj_names = get_annif_projects()
def process(image, project_num: int, lang: str = "eng"):
try:
if not proj_ids:
raise ValueError("No Annif projects available")
if isinstance(image, str):
img = cv2.imread(image)
if img is None:
raise ValueError(f"Unable to read image from path: {image}")
elif isinstance(image, np.ndarray):
img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
else:
raise ValueError("Unsupported image type")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(threshold_img, lang=lang)
if isinstance(image, str):
os.remove(image)
results = annif.suggest(project_id=proj_ids[project_num], text=text)
sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)
html_content = """
<div id="suggestions-wrapper">
<h2 id="suggestions">Suggested subjects</h2>
<ul class="list-group" id="results">
"""
for result in sorted_results:
html_content += f"""
<li class="list-group-item">
<meter value="{result['score']}" min="0" max="1"></meter>
<a href="{result['uri']}">{result['label']}</a>
</li>
"""
html_content += """
</ul>
</div>
"""
return text, html_content
except Exception as e:
return str(e), ""
langs = ("eng", "fin", "swe")
css = """
.gradio-container, .gradio-container * {
color: #343260 !important;
background-color: #f3f3f6;
color: #343260;
font-family: Jost, sans-serif;
font-weight: 400;
font-size: 1.2rem;
line-height: 1.2;
}
body, .mygrclass {
background-color: #f3f3f6;
color: #343260;
font-family: Jost, sans-serif;
font-weight: 400;
font-size: 1.2rem;
line-height: 1.2;
}
.container {
max-width: 1140px;
margin: 0 auto;
padding: 0 15px;
}
header {
background: linear-gradient(to bottom, #ffffff 92%, #d9dfe3 92%, #ffffff 100%);
padding-bottom: 5px;
}
h2 {
font-weight: 500;
font-size: 1.5rem;
padding: 0.5rem 0;
}
#blurb {
font-size: 2.0rem;
font-weight: 500;
line-height: 1;
text-align: center;
margin: 2rem 0;
}
#form {
background: linear-gradient(to bottom, #ffffff 0%, #d9dfe3 1%, #f3f3f6 1%, #f3f3f6 99%, #d9dfe3 99%, #ffffff 100%);
padding: 2rem 0;
}
.form-control {
border-radius: 0px;
}
#get-suggestions {
margin: 2rem 0;
background-color: #6280dc;
color: white;
border: none;
border-radius: 0px;
padding-right: 3rem;
background-image: url('static/img/arrow-white.svg');
background-position: 97% center;
background-repeat: no-repeat;
}
#suggestions-wrapper {
background-color: #f3f3f6;
padding: 1rem;
}
#suggestions {
border-top: 1px solid #343260;
padding-top: 0.5rem;
text-transform: uppercase;
font-size: 1.1rem;
}
.list-group-item {
display: flex;
align-items: center;
padding: 5px 0;
border-bottom: 1px solid #e0e0e0;
}
meter {
width: 24px;
margin-right: 10px;
}
meter::-webkit-meter-bar {
background-color: #ccc;
}
meter::-webkit-meter-optimum-value {
background: #6280dc;
}
"""
with gr.Blocks(theme=gr.themes.Default(radius_size="none"), css=css) as interface:
gr.HTML("""
<div class="container">
<p id="blurb">
Annif demo with image/camera input and OCR
</p>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
image_input = gr.Image(type="numpy", label="Input Image", elem_classes="mygrclass")
with gr.Column(scale=1):
project = gr.Dropdown(choices=proj_names, label="Project (vocabulary and language)", type="index", elem_classes="mygrclass", value=proj_names[2])
lang = gr.Dropdown(choices=langs, label="Select Language for OCR", type="value", value="eng", elem_classes="mygrclass")
submit_btn = gr.Button("Get suggestions", elem_id="get-suggestions", elem_classes="mygrclass")
with gr.Row():
with gr.Column(scale=3):
text_output = gr.Textbox(label="Extracted Text", elem_classes="mygrclass")
with gr.Column(scale=1):
html_output = gr.HTML()
submit_btn.click(process, inputs=[image_input, project, lang], outputs=[text_output, html_output])
interface.launch()