File size: 3,921 Bytes
411445c
502f071
 
 
9e42446
ba68985
9e42446
b7eb50d
502f071
77c5165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8046c49
 
3594946
8046c49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3594946
 
77c5165
10a2445
 
7c6863d
 
 
 
 
 
 
 
b7eb50d
 
10a2445
 
 
 
9513183
10a2445
 
d214fd2
 
77c5165
d214fd2
77c5165
10a2445
7c6863d
 
 
 
 
 
 
 
 
10a2445
 
 
 
9513183
10a2445
 
502f071
d214fd2
77c5165
d214fd2
77c5165
502f071
 
10a2445
 
 
 
 
5dd8526
502f071
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import gradio as gr 
from transformers import AutoTokenizer, pipeline
import torch

tokenizer1 = AutoTokenizer.from_pretrained("notexist/tttff")
tdk1 = pipeline('text-generation', model='notexist/tttff', tokenizer=tokenizer1)
tokenizer2 = AutoTokenizer.from_pretrained("notexist/ttte")  
tdk2 = pipeline('text-generation', model='notexist/ttte', tokenizer=tokenizer2)


# Speller borrowed from http://turkceddi.blogspot.com/
lower_vowel = {'a', 'â', 'e', 'ê', 'ı', 'î', 'i', 'o', 'ô', 'ö', 'u', 'û', 'ü'}
SPELL_SLICER = (('001000', 5), ('000100', 5), ('01000', 4), ('00100', 4), ('00010', 4), ('1000', 3), ('0100', 3),
                ('0011', 3), ('0010', 3), ('011', 2), ('010', 2), ('100', 2), ('10', 1), ('11', 1))


def to_lower(word):
    tolower_text = (word.replace('İ', 'i'))
    tolower_text = (tolower_text.replace('I', 'ı'))
    return tolower_text.lower()


def wordtoten(word: str):
    wtt = ''

    for ch in word:
        if ch in lower_vowel:
            wtt += '1'
        else:
            wtt += '0'
    return wtt

def spellword(word_b: str):
    word_b = to_lower(word_b).strip()
    z = []
    for word in word_b.split():
        syllable_list = []
        tenword = wordtoten(word)
        len_spell = tenword.count('1')

        for i in range(tenword.count('1')):
            for x, y in SPELL_SLICER:
                if tenword.startswith(x):
                    syllable_list.append(word[:y])
                    word = word[y:]
                    tenword = tenword[y:]
                    break

        if tenword == '0':
            syllable_list[-1] = syllable_list[-1] + word
        elif word:
            syllable_list.append(word)

        if len(syllable_list) != len_spell:
            return False

        z.append("·".join(syllable_list))
    return " ".join(z)

def predict(name, sl, topk, topp):
    if name == "":
        x1 = tdk1(f"<|endoftext|>",
            do_sample=True, 
            max_length=64, 
            top_k=topk, 
            top_p=topp, 
            num_return_sequences=1,
            repetition_penalty=sl
        )[0]["generated_text"]
        new_name = x1[len(f"<|endoftext|>"):x1.index("\n\n")]
        x2 = tdk2(f"<|endoftext|>{new_name}\n\n",
            do_sample=True, 
            max_length=64, 
            top_k=topk, 
            top_p=topp, 
            num_return_sequences=1,
            repetition_penalty=sl
        )[0]["generated_text"]
        
        if "[TEXT]" not in x2:
            return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)
        else:
            return x1[len(f"<|endoftext|>"):]+"\n\n"+spellword(new_name)+"\n\n"+x2[len(f"<|endoftext|>{new_name}\n\n"):].replace("[TEXT]", " "+new_name+" ")
    else:
        x1 = tdk1(f"<|endoftext|>{name}\n\n",
            do_sample=True, 
            max_length=64, 
            top_k=topk, 
            top_p=topp, 
            num_return_sequences=1,
            repetition_penalty=sl
        )[0]["generated_text"]
        x2 = tdk2(f"<|endoftext|>{name}\n\n",
            do_sample=True, 
            max_length=64, 
            top_k=topk, 
            top_p=topp, 
            num_return_sequences=1,
            repetition_penalty=sl
        )[0]["generated_text"]

        if "[TEXT]" not in x2:
            return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)
        else:
            return x1[len(f"<|endoftext|>{name}\n\n"):]+"\n\n"+spellword(name)+"\n\n"+x2[len(f"<|endoftext|>{name}\n\n"):].replace("[TEXT]", " "+name+" ")



iface = gr.Interface(fn=predict, inputs=["text",\
                                         gr.inputs.Slider(0, 3, default=1.1, label="repetition_penalty"),\
                                         gr.inputs.Slider(0, 100, default=75, label="top_k"),\
                                         gr.inputs.Slider(0, 1, default=0.95, label="top_p")]
                     ,  outputs="text")
iface.launch()