Spaces:

KwabsHug
/

Language-Learn-Idea

Running

File size: 5,912 Bytes

2c4ef8c
 
 
8c74afb
196b595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4ef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0aafd85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c4ef8c
 
 
 
 
0aafd85
2c4ef8c
 
 
 
 
 
 
 
 
 
0aafd85
 
2c4ef8c
 
 
 
 
 
 
0aafd85
 
 
 
 
2c4ef8c
0aafd85
 
 
 
 
 
 
 
 
 
 
 
2c4ef8c
 
0aafd85
 
 
0747efa
0aafd85
 
2c4ef8c
0aafd85
 
0747efa
0aafd85
 
83fdea9
20df333
0aafd85
20df333
 
2c4ef8c

from googletrans import Translator
import spacy
import gradio as gr
import nltk
from nltk.corpus import wordnet

nltk.download('maxent_ne_chunker') #Chunker
nltk.download('stopwords') #Stop Words List (Mainly Roman Languages)
nltk.download('words') #200 000+ Alphabetical order list
nltk.download('punkt') #Tokenizer
nltk.download('verbnet') #For Description of Verbs
nltk.download('omw')
nltk.download('omw-1.4') #Multilingual Wordnet
nltk.download('wordnet') #For Definitions, Antonyms and Synonyms
nltk.download('shakespeare')
nltk.download('dolch') #Sight words
nltk.download('names') #People Names NER
nltk.download('gazetteers') #Location NER
nltk.download('opinion_lexicon') #Sentiment words


spacy.cli.download("en_core_web_sm")

nlp = spacy.load('en_core_web_sm')
translator = Translator()

def Sentencechunker(sentence):
    Sentchunks = sentence.split(" ")
    chunks = []
    for i in range(len(Sentchunks)):
        chunks.append(" ".join(Sentchunks[:i+1]))
    return " | ".join(chunks)

def ReverseSentenceChunker(sentence):
    reversed_sentence = " ".join(reversed(sentence.split()))
    chunks = Sentencechunker(reversed_sentence)
    return chunks

def three_words_chunk(sentence):
    words = sentence.split()
    chunks = [words[i:i+3] for i in range(len(words)-2)]
    chunks = [" ".join(chunk) for chunk in chunks]
    return " | ".join(chunks)

def keep_nouns_verbs(sentence):
    doc = nlp(sentence)
    nouns_verbs = []
    for token in doc:
        if token.pos_ in ['NOUN','VERB','PUNCT']:
            nouns_verbs.append(token.text)
    return " ".join(nouns_verbs)

def unique_word_count(text="", state=None):
    if state is None:
        state = {}
    words = text.split()
    word_counts = state
    for word in words:
        if word in word_counts:
            word_counts[word] += 1
        else:
            word_counts[word] = 1
    sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
    return sorted_word_counts,    

def Wordchunker(word):
    chunks = []
    for i in range(len(word)):
        chunks.append(word[:i+1])
    return chunks

def BatchWordChunk(sentence):
  words = sentence.split(" ")
  FinalOutput = ""
  Currentchunks = ""
  ChunksasString = ""
  for word in words:
    ChunksasString = ""
    Currentchunks = Wordchunker(word)
    for chunk in Currentchunks:
      ChunksasString += chunk + " "
    FinalOutput += "\n" + ChunksasString
  return FinalOutput

# Translate from English to French

langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")

ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks", "Spelling Chunks"], label="Choose Chunk Type", value="Chunks")

def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
  FinalOutput = ""
  TransFinalOutput = ""
  if Chunkmode=="Chunks": 
    FinalOutput += Sentencechunker(Text)
  if Chunkmode=="Reverse":
    FinalOutput += ReverseSentenceChunker(Text)
  if Chunkmode=="Three Word Chunks": 
    FinalOutput += three_words_chunk(Text) 
  if Chunkmode=="Spelling Chunks":
    FinalOutput += BatchWordChunk(Text)
  
  if Translate: 
    TransFinalOutput = FinalOutput
    translated = translator.translate(TransFinalOutput, dest=langdest)
    FinalOutput += "\n" + translated.text
  return FinalOutput

def SepHypandSynExpansion(text):
  # Tokenize the text
  tokens = nltk.word_tokenize(text)
  NoHits = "Words to pay special attention to: "
  FinalOutput = ""

  # Find synonyms and hypernyms of each word in the text
  for token in tokens:
      synonyms = []
      hypernyms = []
      for synset in wordnet.synsets(token):
          synonyms += synset.lemma_names()
          hypernyms += [hypernym.name() for hypernym in synset.hypernyms()]
      if not synonyms and not hypernyms:
          NoHits += f"{token} | "
      else:
          FinalOutput += "\n" f"{token}: hypernyms={hypernyms}, synonyms={synonyms}"
  return NoHits, FinalOutput  

with gr.Blocks() as lliface:
  with gr.Tab("Welcome"):
    gr.HTML("<h1> Spaces Test - Still Undercontruction </h1> <p> Knowledge is a Language </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles<br>, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization<br>  Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
  with gr.Tab("Transcribe - RASMUS Whisper"):
    gr.HTML("""<a href="https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles">https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles</a>""")
    gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
  with gr.Tab("Chunks"):
    gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
    gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation)")
  with gr.Tab("Unique words, Hypernyms and synonyms"):
    gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
    gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], title="Word suggestions")
  with gr.Tab("Timing Practice"):
    gr.HTML("""<iframe height="1200" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
  See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj">
  Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>)
  on <a href="https://codepen.io">CodePen</a>.
</iframe>""")

lliface.launch()