Spaces:
Sleeping
Sleeping
File size: 7,924 Bytes
2678b8c 3c7a691 2678b8c f59c7b5 c978338 2678b8c ff66157 2678b8c f59c7b5 2678b8c f59c7b5 88abc31 8507606 88abc31 8507606 88abc31 2678b8c c8cbb2b 2678b8c 3c7a691 92a0f65 2678b8c c978338 92a0f65 635f231 2678b8c 635f231 c978338 635f231 7c1e17d 635f231 2678b8c 635f231 3c7a691 635f231 c978338 88abc31 635f231 88abc31 92a0f65 8507606 92a0f65 74aeece 92a0f65 74aeece 02f5728 92a0f65 2678b8c c978338 635f231 92a0f65 635f231 88abc31 2678b8c c978338 f59c7b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
import os
from collections import OrderedDict
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
import gradio as gr
from shitsu import ShitsuScorer
from huggingface_hub import hf_hub_download
class OptimizedShitsuScorer:
def __init__(self, max_models=2):
self.scorers = OrderedDict()
self.max_models = max_models
self.current_language = None
def get_scorer(self, language):
if language in self.scorers:
# Move the accessed language to the end (most recently used)
self.scorers.move_to_end(language)
else:
gr.Warning("A new language is being loaded in memory, this could take a while...")
# If we're at capacity, remove the least recently used model
if len(self.scorers) >= self.max_models:
self.scorers.popitem(last=False)
# Load the new model
self.scorers[language] = ShitsuScorer(language)
self.current_language = language
return self.scorers[language]
def score(self, text, language):
scorer = self.get_scorer(language)
return scorer.score([text])[0]
def get_loaded_languages(self):
return list(self.scorers.keys())
optimized_scorer = OptimizedShitsuScorer(max_models=2)
# Preload English model
optimized_scorer.get_scorer('en')
example_inputs = [
"The Beatles were a popular band in the 1960s. They released many hit songs.",
"Chocolate is a type of sweet food that people often eat for dessert.",
"I'm thinking of going to the beach this weekend. The weather is supposed to be great!",
"Can you believe it's already September? This year is flying by!",
"Quantum mechanics is a fundamental theory in physics that provides a description of the physical properties of nature at the scale of atoms and subatomic particles.",
]
def get_score(user_text, language):
score = optimized_scorer.score(user_text, language)
formatted_score = f"{score:.4g}"
loaded_languages = optimized_scorer.get_loaded_languages()
display_loaded_languages = [('Currently loaded languages: \n', None)]
for language in loaded_languages:
display_loaded_languages.append((language_map[language], language))
display_loaded_languages.append((' ', None))
return f'<div class="nice-box"> Score: {formatted_score}</div>', display_loaded_languages
language_options = ['am', 'ar', 'bg', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fi', 'fr', 'gu', 'ha', 'hi', 'hu', 'id', 'it', 'ja', 'jv', 'kn', 'ko', 'lt', 'mr', 'nl', 'no', 'yo', 'zh']
language_map = {
'am': 'Amharic',
'ar': 'Arabic',
'bg': 'Bulgarian',
'bn': 'Bengali',
'cs': 'Czech',
'da': 'Danish',
'de': 'German',
'el': 'Greek',
'en': 'English',
'es': 'Spanish',
'fa': 'Persian',
'fi': 'Finnish',
'fr': 'French',
'gu': 'Gujarati',
'ha': 'Hausa',
'hi': 'Hindi',
'hu': 'Hungarian',
'id': 'Indonesian',
'it': 'Italian',
'ja': 'Japanese',
'jv': 'Javanese',
'kn': 'Kannada',
'ko': 'Korean',
'lt': 'Lithuanian',
'mr': 'Marathi',
'nl': 'Dutch',
'no': 'Norwegian',
'yo': 'Yoruba',
'zh': 'Chinese'
}
color_map = {
"am": "green", # Ethiopia's flag has green
"ar": "black", # Many Arab flags feature black
"bg": "white", # Bulgaria's flag has white
"bn": "green", # Bangladesh's flag is green and red
"cs": "blue", # Czech Republic's flag has blue
"da": "red", # Denmark's flag is red and white
"de": "black", # Germany's flag has black
"el": "blue", # Greece's flag has blue
"en": "red", # UK/US flags have red
"es": "yellow", # Spain's flag has yellow
"fa": "green", # Iran's flag has green
"fi": "blue", # Finland's flag is blue and white
"fr": "blue", # France's flag has blue
"gu": "saffron", # India (Gujarat) flag's color
"ha": "green", # Nigeria's flag has green
"hi": "orange", # India's flag has orange
"hu": "red", # Hungary's flag has red
"id": "red", # Indonesia's flag is red and white
"it": "green", # Italy's flag has green
"ja": "red", # Japan's flag has a red sun
"jv": "brown", # Associated with traditional Javanese culture
"kn": "yellow", # Karnataka (Indian state) flag has yellow
"ko": "blue", # South Korea's flag has blue
"lt": "yellow", # Lithuania's flag has yellow
"mr": "saffron", # Marathi culture often uses saffron
"nl": "orange", # The Netherlands is often associated with orange
"no": "red", # Norway's flag is red, white, and blue
"yo": "green", # Nigeria's flag for Yoruba-speaking people
"zh": "red" # China's flag is red
}
css = '''
#gen_btn{height: 100%}
#title{text-align: center}
#title h1{font-size: 3em; display:inline-flex; align-items:center}
#title img{width: 100px; margin-right: 0.5em}
#gallery .grid-wrap{height: 10vh}
.card_internal{display: flex;height: 100px;margin-top: .5em}
.card_internal img{margin-right: 1em}
.styler{--form-gap-width: 0px !important}
.nice-box {
border: 2px solid #007bff;
border-radius: 10px;
padding: 15px;
background-color: #f8f9fa;
font-size: 18px;
text-align: center;
min-height: 60px;
display: flex;
align-items: center;
justify-content: center;
}
'''
theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="sky",
)
with gr.Blocks(theme=theme, css=css) as demo:
title = gr.HTML(
"""<h1><img src="https://huggingface.co/spaces/Dusduo/shitsu-text-scorer-demo/resolve/main/shitsu-logo.jpeg" alt="LightBlue"> Shitsu Text Scorer</h1>""",
elem_id="title",
)
gr.Markdown(
"""This is a demo of [Shitsu text scorer](https://huggingface.co/lightblue/shitsu_text_scorer) for multiple languages, which scores text based on the amount of useful, textbook-like information in it.
It outputs a score generally between 0 and 1 but can exceed both of these bounds as it is a regressor.
⚠️ By default, the English version of the scorer is preloaded in memory. When using another language for the first time, beware extensive loading time.
"""
)
with gr.Row():
user_text = gr.Textbox(label='Input text', placeholder='Type something here...')
with gr.Column(scale=0):
submit_btn = gr.Button("Submit")
score = gr.HTML(
value='<div class="nice-box"> Score... </div>',
label="Output"
)
with gr.Row():
language_choice = gr.Dropdown(
choices=language_options,
label="Choose a language",
info="Type to search",
value="en",
allow_custom_value=True,
scale=3
)
loaded_languages = gr.HighlightedText(
value = [('Currently loaded languages: \n', None), ('English', 'en')],
label="",
combine_adjacent=True,
show_legend=False, #True,
color_map=color_map,
scale=1)
#loaded_languages = gr.Markdown("Currently loaded languages: en")
gr.Examples(examples=example_inputs, inputs=user_text)
gr.Markdown(
"""
---
## 🛈 **Additional Information**
This model can also be found on [Github](https://github.com/lightblue-tech/shitsu) and has its own pip installable package.
This model is based on fasttext embeddings, meaning that it can be used on large amounts of data with limited compute quickly.
This scorer can be used to filter useful information from large text corpora in many languages.
"""
)
submit_btn.click(get_score, inputs=[user_text, language_choice], outputs=[score, loaded_languages])
demo.launch()
|