KwabsHug's picture
Update app.py
0747efa
raw
history blame
5.42 kB
from googletrans import Translator
import spacy
import gradio as gr
spacy.cli.download("en_core_web_sm")
nlp = spacy.load('en_core_web_sm')
translator = Translator()
def Sentencechunker(sentence):
Sentchunks = sentence.split(" ")
chunks = []
for i in range(len(Sentchunks)):
chunks.append(" ".join(Sentchunks[:i+1]))
return " | ".join(chunks)
def ReverseSentenceChunker(sentence):
reversed_sentence = " ".join(reversed(sentence.split()))
chunks = Sentencechunker(reversed_sentence)
return chunks
def three_words_chunk(sentence):
words = sentence.split()
chunks = [words[i:i+3] for i in range(len(words)-2)]
chunks = [" ".join(chunk) for chunk in chunks]
return " | ".join(chunks)
def keep_nouns_verbs(sentence):
doc = nlp(sentence)
nouns_verbs = []
for token in doc:
if token.pos_ in ['NOUN','VERB','PUNCT']:
nouns_verbs.append(token.text)
return " ".join(nouns_verbs)
def unique_word_count(text="", state=None):
if state is None:
state = {}
words = text.split()
word_counts = state
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_word_counts,
def Wordchunker(word):
chunks = []
for i in range(len(word)):
chunks.append(word[:i+1])
return chunks
def BatchWordChunk(sentence):
words = sentence.split(" ")
FinalOutput = ""
Currentchunks = ""
ChunksasString = ""
for word in words:
ChunksasString = ""
Currentchunks = Wordchunker(word)
for chunk in Currentchunks:
ChunksasString += chunk + " "
FinalOutput += "\n" + ChunksasString
return FinalOutput
# Translate from English to French
langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
FinalOutput = ""
TransFinalOutput = ""
if Chunkmode=="Chunks":
FinalOutput += Sentencechunker(Text)
if Chunkmode=="Reverse":
FinalOutput += ReverseSentenceChunker(Text)
if Chunkmode=="Three Word Chunks":
FinalOutput += three_words_chunk(Text)
if Chunkmode=="Spelling Chunks":
FinalOutput += BatchWordChunk(Text)
if Translate:
TransFinalOutput = FinalOutput
translated = translator.translate(TransFinalOutput, dest=langdest)
FinalOutput += "\n" + translated.text
return FinalOutput
def SepHypandSynExpansion(text):
# Tokenize the text
tokens = nltk.word_tokenize(text)
NoHits = "Words to pay special attention to: "
FinalOutput = ""
# Find synonyms and hypernyms of each word in the text
for token in tokens:
synonyms = []
hypernyms = []
for synset in wordnet.synsets(token):
synonyms += synset.lemma_names()
hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
if not synonyms and not hypernyms:
NoHits += f"{token} | "
else:
FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
return NoHits, FinalOutput
with gr.Blocks() as lliface:
with gr.Tab("Welcome"):
gr.HTML("<h1> Spaces Test - Still Undercontruction </h1> <p> Knowledge is a Language </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles<br>, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization<br> Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
with gr.Tab("Transcribe - RASMUS Whisper"):
gr.HTML("""<a href="https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles">https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles</a>""")
gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
with gr.Tab("Chunks"):
gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation)")
with gr.Tab("Unique words, Hypernyms and synonyms"):
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], title="Word suggestions")
with gr.Tab("Timing Practice"):
gr.HTML("""<p class="codepen" data-height="300" data-default-tab="result" data-slug-hash="GRXKQgj" data-preview="true" data-editable="true" data-user="kwabs22" style="height: 300px; box-sizing: border-box; display: flex; align-items: center; justify-content: center; border: 2px solid; margin: 1em 0; padding: 1em;">
<span>See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj">
Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>)
on <a href="https://codepen.io">CodePen</a>.</span>
</p>
<script async src="https://cpwebassets.codepen.io/assets/embed/ei.js"></script>""")
lliface.launch()