ttt / app.py
notexist's picture
Update app.py (#2)
b32f8e9 verified
import gradio as gr
from transformers import AutoTokenizer, pipeline
import torch
tokenizer1 = AutoTokenizer.from_pretrained("notexist/tttff")
tdk1 = pipeline('text-generation', model='notexist/tttff', tokenizer=tokenizer1)
tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")
tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)
# Speller borrowed from http://turkceddi.blogspot.com/
lower_vowel = {'a', 'â', 'e', 'ê', 'ı', 'î', 'i', 'o', 'ô', 'ö', 'u', 'û', 'ü'}
SPELL_SLICER = (('001000', 5), ('000100', 5), ('01000', 4), ('00100', 4), ('00010', 4), ('1000', 3), ('0100', 3),
('0011', 3), ('0010', 3), ('011', 2), ('010', 2), ('100', 2), ('10', 1), ('11', 1))
def to_lower(word):
tolower_text = (word.replace('İ', 'i'))
tolower_text = (tolower_text.replace('I', 'ı'))
return tolower_text.lower()
def wordtoten(word: str):
wtt = ''
for ch in word:
if ch in lower_vowel:
wtt += '1'
else:
wtt += '0'
return wtt
def spellword(word_b: str):
word_b = to_lower(word_b).strip()
z = []
for word in word_b.split():
syllable_list = []
tenword = wordtoten(word)
len_spell = tenword.count('1')
for i in range(tenword.count('1')):
for x, y in SPELL_SLICER:
if tenword.startswith(x):
syllable_list.append(word[:y])
word = word[y:]
tenword = tenword[y:]
break
if tenword == '0':
syllable_list[-1] = syllable_list[-1] + word
elif word:
syllable_list.append(word)
if len(syllable_list) != len_spell:
return False
z.append("·".join(syllable_list))
return " ".join(z)
def predict(name, sl, topk, topp):
if name == "":
x1 = tdk1(f"<|endoftext|>",
do_sample=True,
max_length=64,
top_k=topk,
top_p=topp,
num_return_sequences=1,
repetition_penalty=sl
)[0]["generated_text"]
new_name = x1[len(f"<|endoftext|>"):x1.index("\n\n")]
x2 = tdk2(f"<|endoftext|>{new_name}\n\n",
do_sample=True,
max_length=64,
top_k=topk,
top_p=topp,
num_return_sequences=1,
repetition_penalty=sl
)[0]["generated_text"]
if "[TEXT]" not in x2:
return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)
else:
return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
else:
x1 = tdk1(f"<|endoftext|>{name}\n\n",
do_sample=True,
max_length=64,
top_k=topk,
top_p=topp,
num_return_sequences=1,
repetition_penalty=sl
)[0]["generated_text"]
x2 = tdk2(f"<|endoftext|>{name}\n\n",
do_sample=True,
max_length=64,
top_k=topk,
top_p=topp,
num_return_sequences=1,
repetition_penalty=sl
)[0]["generated_text"]
if "[TEXT]" not in x2:
return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)
else:
return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")
iface = gr.Interface(fn=predict, inputs=["text",\
gr.inputs.Slider(0, 3, default=1.1, label="repetition_penalty"),\
gr.inputs.Slider(0, 100, default=75, label="top_k"),\
gr.inputs.Slider(0, 1, default=0.95, label="top_p")]
, outputs="text")
iface.launch()