Spaces:
Running
Running
File size: 5,912 Bytes
2c4ef8c 8c74afb 196b595 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 2c4ef8c 0aafd85 0747efa 0aafd85 2c4ef8c 0aafd85 0747efa 0aafd85 83fdea9 20df333 0aafd85 20df333 2c4ef8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from googletrans import Translator
import spacy
import gradio as gr
import nltk
from nltk.corpus import wordnet
nltk.download('maxent_ne_chunker') #Chunker
nltk.download('stopwords') #Stop Words List (Mainly Roman Languages)
nltk.download('words') #200 000+ Alphabetical order list
nltk.download('punkt') #Tokenizer
nltk.download('verbnet') #For Description of Verbs
nltk.download('omw')
nltk.download('omw-1.4') #Multilingual Wordnet
nltk.download('wordnet') #For Definitions, Antonyms and Synonyms
nltk.download('shakespeare')
nltk.download('dolch') #Sight words
nltk.download('names') #People Names NER
nltk.download('gazetteers') #Location NER
nltk.download('opinion_lexicon') #Sentiment words
spacy.cli.download("en_core_web_sm")
nlp = spacy.load('en_core_web_sm')
translator = Translator()
def Sentencechunker(sentence):
Sentchunks = sentence.split(" ")
chunks = []
for i in range(len(Sentchunks)):
chunks.append(" ".join(Sentchunks[:i+1]))
return " | ".join(chunks)
def ReverseSentenceChunker(sentence):
reversed_sentence = " ".join(reversed(sentence.split()))
chunks = Sentencechunker(reversed_sentence)
return chunks
def three_words_chunk(sentence):
words = sentence.split()
chunks = [words[i:i+3] for i in range(len(words)-2)]
chunks = [" ".join(chunk) for chunk in chunks]
return " | ".join(chunks)
def keep_nouns_verbs(sentence):
doc = nlp(sentence)
nouns_verbs = []
for token in doc:
if token.pos_ in ['NOUN','VERB','PUNCT']:
nouns_verbs.append(token.text)
return " ".join(nouns_verbs)
def unique_word_count(text="", state=None):
if state is None:
state = {}
words = text.split()
word_counts = state
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
return sorted_word_counts,
def Wordchunker(word):
chunks = []
for i in range(len(word)):
chunks.append(word[:i+1])
return chunks
def BatchWordChunk(sentence):
words = sentence.split(" ")
FinalOutput = ""
Currentchunks = ""
ChunksasString = ""
for word in words:
ChunksasString = ""
Currentchunks = Wordchunker(word)
for chunk in Currentchunks:
ChunksasString += chunk + " "
FinalOutput += "\n" + ChunksasString
return FinalOutput
# Translate from English to French
langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")
def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
FinalOutput = ""
TransFinalOutput = ""
if Chunkmode=="Chunks":
FinalOutput += Sentencechunker(Text)
if Chunkmode=="Reverse":
FinalOutput += ReverseSentenceChunker(Text)
if Chunkmode=="Three Word Chunks":
FinalOutput += three_words_chunk(Text)
if Chunkmode=="Spelling Chunks":
FinalOutput += BatchWordChunk(Text)
if Translate:
TransFinalOutput = FinalOutput
translated = translator.translate(TransFinalOutput, dest=langdest)
FinalOutput += "\n" + translated.text
return FinalOutput
def SepHypandSynExpansion(text):
# Tokenize the text
tokens = nltk.word_tokenize(text)
NoHits = "Words to pay special attention to: "
FinalOutput = ""
# Find synonyms and hypernyms of each word in the text
for token in tokens:
synonyms = []
hypernyms = []
for synset in wordnet.synsets(token):
synonyms += synset.lemma_names()
hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
if not synonyms and not hypernyms:
NoHits += f"{token} | "
else:
FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
return NoHits, FinalOutput
with gr.Blocks() as lliface:
with gr.Tab("Welcome"):
gr.HTML("<h1> Spaces Test - Still Undercontruction </h1> <p> Knowledge is a Language </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles<br>, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization<br> Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
with gr.Tab("Transcribe - RASMUS Whisper"):
gr.HTML("""<a href="https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles">https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles</a>""")
gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
with gr.Tab("Chunks"):
gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation)")
with gr.Tab("Unique words, Hypernyms and synonyms"):
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], title="Word suggestions")
with gr.Tab("Timing Practice"):
gr.HTML("""<iframe height="1200" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj">
Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>)
on <a href="https://codepen.io">CodePen</a>.
</iframe>""")
lliface.launch() |