Spaces:
Running
Running
File size: 5,279 Bytes
2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 0747efa 0aafd85 2c4ef8c 0aafd85 0747efa 0aafd85 20df333 0aafd85 20df333 2c4ef8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from googletrans import Translator
import spacy
import gradio as gr
spacy.cli.download("en_core_web_sm")
nlp = spacy.load('en_core_web_sm')
translator = Translator()
def Sentencechunker(sentence):
Sentchunks = sentence.split(" ")
chunks = []
for i in range(len(Sentchunks)):
chunks.append(" ".join(Sentchunks[:i+1]))
return " | ".join(chunks)
def ReverseSentenceChunker(sentence):
reversed_sentence = " ".join(reversed(sentence.split()))
chunks = Sentencechunker(reversed_sentence)
return chunks
def three_words_chunk(sentence):
words = sentence.split()
chunks = [words[i:i+3] for i in range(len(words)-2)]
chunks = [" ".join(chunk) for chunk in chunks]
return " | ".join(chunks)
def keep_nouns_verbs(sentence):
doc = nlp(sentence)
nouns_verbs = []
for token in doc:
if token.pos_ in ['NOUN','VERB','PUNCT']:
nouns_verbs.append(token.text)
return " ".join(nouns_verbs)
def unique_word_count(text="", state=None):
if state is None:
state = {}
words = text.split()
word_counts = state
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_word_counts,
def Wordchunker(word):
chunks = []
for i in range(len(word)):
chunks.append(word[:i+1])
return chunks
def BatchWordChunk(sentence):
words = sentence.split(" ")
FinalOutput = ""
Currentchunks = ""
ChunksasString = ""
for word in words:
ChunksasString = ""
Currentchunks = Wordchunker(word)
for chunk in Currentchunks:
ChunksasString += chunk + " "
FinalOutput += "\n" + ChunksasString
return FinalOutput
# Translate from English to French
langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
FinalOutput = ""
TransFinalOutput = ""
if Chunkmode=="Chunks":
FinalOutput += Sentencechunker(Text)
if Chunkmode=="Reverse":
FinalOutput += ReverseSentenceChunker(Text)
if Chunkmode=="Three Word Chunks":
FinalOutput += three_words_chunk(Text)
if Chunkmode=="Spelling Chunks":
FinalOutput += BatchWordChunk(Text)
if Translate:
TransFinalOutput = FinalOutput
translated = translator.translate(TransFinalOutput, dest=langdest)
FinalOutput += "\n" + translated.text
return FinalOutput
def SepHypandSynExpansion(text):
# Tokenize the text
tokens = nltk.word_tokenize(text)
NoHits = "Words to pay special attention to: "
FinalOutput = ""
# Find synonyms and hypernyms of each word in the text
for token in tokens:
synonyms = []
hypernyms = []
for synset in wordnet.synsets(token):
synonyms += synset.lemma_names()
hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
if not synonyms and not hypernyms:
NoHits += f"{token} | "
else:
FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
return NoHits, FinalOutput
with gr.Blocks() as lliface:
with gr.Tab("Welcome"):
gr.HTML("<h1> Spaces Test - Still Undercontruction </h1> <p> Knowledge is a Language </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles<br>, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization<br> Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
with gr.Tab("Transcribe - RASMUS Whisper"):
gr.HTML("""<a href="https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles">https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles</a>""")
gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
with gr.Tab("Chunks"):
gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation)")
with gr.Tab("Unique words, Hypernyms and synonyms"):
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], title="Word suggestions")
with gr.Tab("Timing Practice"):
gr.HTML("""<iframe height="300" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj">
Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>)
on <a href="https://codepen.io">CodePen</a>.
</iframe>""")
lliface.launch() |