wordcloud / app.py
teatwots's picture
Update app.py
ea5d289 verified
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from collections import Counter
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import gradio as gr
import pandas as pd
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
# Define example sentences, synonyms, and Korean meanings for the word list
word_data_examples = {
"village": ("The village was quiet at night.", "hamlet, community", "λ§ˆμ„", "μ‹œκ³¨"),
"adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "λͺ¨ν—˜", "μ—¬ν–‰"),
"map": ("We used a map to find the hidden treasure.", "chart, atlas", "지도", "약도"),
"cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "동꡴", "κ΅΄"),
"among": ("She found her book among the pile of papers.", "amidst, between", "κ°€μš΄λ°", "사이에"),
"mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "μ‚°", "μ‚°λ§₯"),
"children": ("The children played games in the park.", "kids, youngsters", "아이듀", "어린이"),
"known": ("He was known for his kindness and bravery.", "recognized, famous", "μ•Œλ €μ§„", "유λͺ…ν•œ"),
"hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "μˆ¨κ²¨μ§„", "λΉ„λ°€μ˜"),
"local": ("The local market was full of fresh produce.", "regional, native", "μ§€μ—­μ˜", "ν˜„μ§€μ˜"),
"discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "발견", "탐ꡬ"),
"eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "λ…μˆ˜λ¦¬", "맹금"),
"villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "λ§ˆμ„ μ‚¬λžŒλ“€", "μ£Όλ―Όλ“€"),
"legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "μ „μ„€", "μ‹ ν™”"),
"tales": ("Grandma told us tales of her childhood.", "stories, narratives", "이야기", "동화"),
"daring": ("His daring escape from the cave was legendary.", "bold, audacious", "λŒ€λ‹΄ν•œ", "μš©κ°ν•œ"),
"spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "μ •μ‹ ", "혼"),
"exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "νƒν—˜ν•˜λ‹€", "νƒκ΅¬ν•˜λ‹€"),
"old": ("The old castle was full of secrets.", "ancient, aged", "였래된", "낑은"),
"lost": ("He felt lost without his best friend.", "missing, misplaced", "μžƒμ–΄λ²„λ¦°", "길을 μžƒμ€"),
"ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "κ³ λŒ€μ˜", "μ˜›λ‚ μ˜"),
"inside": ("Inside the box was a beautiful necklace.", "within, interior", "μ•ˆμͺ½", "λ‚΄λΆ€"),
"treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "κ·€μ€‘ν’ˆ"),
"whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "μ†μ‚­μ΄λŠ”", "쑰용히 λ§ν•˜λŠ”"),
"hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "빈", "ꡬ멍 λ‚œ"),
"decided": ("She decided to take the long way home.", "determined, resolved", "κ²°μ •ν•˜λ‹€", "κ²°μ‹¬ν•˜λ‹€"),
"journey": ("Their journey took them across the country.", "trip, voyage", "μ—¬ν–‰", "μ—¬μ •"),
"together": ("They worked together to solve the mystery.", "jointly, collectively", "ν•¨κ»˜", "같이"),
"way": ("She found a new way to solve the puzzle.", "method, manner", "방법", "방식"),
"reached": ("They finally reached the top of the hill.", "arrived, attained", "λ„λ‹¬ν•˜λ‹€", "λ„μ°©ν•˜λ‹€"),
"chest": ("The chest was filled with gold coins.", "trunk, box", "μƒμž", "κ°€μŠ΄"),
"boulder": ("A large boulder blocked the path.", "rock, stone", "λ°”μœ„", "돌"),
"artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "유물", "κ³ λŒ€ 유물"),
"legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "μ „μ„€", "μ‹ ν™”"),
"explore": ("They wanted to explore the old mansion.", "investigate, examine", "νƒν—˜ν•˜λ‹€", "μ‘°μ‚¬ν•˜λ‹€"),
"secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "λΉ„λ°€", "μˆ¨κ²¨μ§„"),
"small": ("The small kitten was very playful.", "tiny, little", "μž‘μ€", "μ†Œν˜•"),
"mountain": ("The mountain was covered in thick forests.", "peak, hill", "μ‚°", "μ‚°λ§₯"),
"part": ("Each part of the puzzle was important.", "piece, segment", "λΆ€λΆ„", "쑰각"),
"everyday": ("He wore his everyday clothes to the party.", "daily, routine", "일상적인", "맀일의"),
"life": ("Life in the village was peaceful.", "existence, being", "μ‚Ά", "생λͺ…"),
"nestled": ("The cabin was nestled in the woods.", "tucked, situated", "자리 μž‘λ‹€", "μœ„μΉ˜ν•˜λ‹€"),
"towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "우뚝 μ†Ÿμ€", "높은"),
"peaks": ("The mountain peaks were covered in snow.", "summits, crests", "μ‚°λ΄‰μš°λ¦¬", "정상"),
"said": ("He said he would be back soon.", "stated, remarked", "λ§ν•˜λ‹€", "μ–ΈκΈ‰ν•˜λ‹€"),
"protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보호된", "μ§€μΌœμ§„"),
"massive": ("The massive ship docked at the port.", "enormous, huge", "κ±°λŒ€ν•œ", "μ—„μ²­λ‚œ"),
"supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "μ•„λ§ˆ", "좔정상"),
"watched": ("They watched the movie together.", "observed, viewed", "보닀", "κ΄€μ°°ν•˜λ‹€"),
"perch": ("The bird found a perch on the windowsill.", "roost, rest", "νšƒλŒ€", "앉닀")
}
# Words to be excluded from both the word cloud and the word list
exclude_words = set([
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
])
def get_wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return None
def process_text(text):
words = nltk.word_tokenize(text)
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
word_freq = Counter(words)
pos_tags = nltk.pos_tag(words)
return word_freq, pos_tags
def generate_wordcloud(word_freq):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig('wordcloud.png')
return 'wordcloud.png'
def translate_and_get_pos(word_freq, pos_tags):
pos_map = {
'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (κ³Όκ±°ν˜•)', 'VBG': 'v. (ingν˜•)',
'VBN': 'v. (κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
'UH': 'intj.'
}
seen_verbs = set() # To track if we have already annotated specific verb forms
word_data = []
for word, freq in word_freq.items():
if word not in word_data_examples:
continue # Skip if the word is not in the word_data_examples
pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
pos_list = set(pos_list) if pos_list else {'N/A'}
if 'N/A' in pos_list or word in exclude_words:
continue # Skip if no valid POS or excluded word
pos_str = ", ".join(pos_list)
# Check if the word is a specific verb form and get the base form
lemmatized_word = word
original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
for pos_tag in original_pos_tags:
wn_pos = get_wordnet_pos(pos_tag)
if wn_pos == wordnet.VERB:
lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
if word != lemmatized_word and lemmatized_word not in seen_verbs:
if pos_tag.startswith('VBD'):
pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°ν˜•)"
elif pos_tag.startswith('VBG'):
pos_str += f" (v. {lemmatized_word}의 ingν˜•)"
elif pos_tag.startswith('VBN'):
pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)"
seen_verbs.add(lemmatized_word)
translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
example_sentence, synonyms = word_data_examples[word][:2]
word_data.append((word, pos_str, translation, example_sentence, synonyms))
# Sort the word data by frequency
word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)
return word_data
def main(text):
word_freq, pos_tags = process_text(text)
wordcloud_image = generate_wordcloud(word_freq)
word_data = translate_and_get_pos(word_freq, pos_tags)
# Create a DataFrame to display the word data in a table format
df = pd.DataFrame(word_data, columns=["μ–΄νœ˜ (Word)", "λ²”μ£Ό (Category)", "뜻 (Meaning)", "예문 (Example)", "λ™μ˜μ–΄ (Synonyms)"])
word_data_table = df.to_html(index=False, justify='center')
return wordcloud_image, word_data_table
# Custom CSS for the Gradio interface
css = """
<style>
body {
background-color: skyblue !important;
}
.gr-button {
background-color: blue !important;
border-color: blue !important;
}
table {
width: 100%;
border-collapse: collapse;
text-align: center;
}
th, td {
padding: 8px;
border: 1px solid #ddd;
}
th {
background-color: #f2f2f2;
}
</style>
"""
# Gradio interface
interface = gr.Interface(
fn=main,
inputs="text",
outputs=["image", "html"],
title="Wordcloud Vocabulary Learning App",
description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
"<br><br><b>The full text:</b><br>"
"""<blockquote>In the small mountain village of Echo Ridge, adventure was a part of everyday life. Nestled among towering peaks, the village was said to be protected by the "Guardian of the Glen," a massive eagle that supposedly watched over the villagers from its perch high in the mountains. The legend inspired many adventurous tales among the villagers, especially the children.
Among these children was a bright-eyed eighth grader named Alex. Alex was known for his daring spirit and his love for exploring the rugged landscapes around Echo Ridge. He had a particular fascination with the old maps and tales of hidden treasures that had been lost in the mountains centuries ago.
One day, while exploring the local library, Alex stumbled upon an ancient map tucked inside a forgotten book on village lore. The map hinted at the location of a lost treasure, hidden deep within a cave known as Whispering Hollow. Excited by the prospect of a real adventure, Alex decided to seek out the treasure.
Knowing the journey would be risky, he enlisted the help of his best friends, Mia and Sam. Together, they prepared for the expedition, gathering supplies and studying the map extensively. They planned their route, took note of landmarks, and readied themselves for any challenges they might face.
Their journey began at dawn. They trekked through dense forests, crossed rushing streams, and climbed steep cliffs. Along the way, they encountered various wildlife and navigated through tricky terrains, their map guiding them every step of the way.
After hours of hiking, they finally reached Whispering Hollow. The cave was more magnificent than they had imagined, filled with intricate stalactites and echoes of dripping water. Using their flashlights, they ventured deeper into the cave, guided by the markings on the map.
As they reached the heart of the cave, they discovered an ancient chest hidden behind a fallen boulder. With hearts pounding, they moved the boulder and opened the chest. Inside, instead of gold or jewels, they found a collection of old artifacts: pottery, coins, and a beautifully carved statuette of an eagle β€” the Guardian of the Glen.
Realizing the historical significance of their find, they decided to donate the artifacts to the local museum. The village celebrated their discovery, and the children were hailed as heroes. Their adventure brought the community together, sparking a renewed interest in the history and legends of Echo Ridge. Alex, Mia, and Sam became local legends, known not only for their daring but also for their spirit of discovery and respect for heritage. They continued to explore the mountains, each adventure strengthening their friendship and deepening their connection to their village.
The legend of the Guardian of the Glen lived on, not just as a protector but as a symbol of adventure and discovery, inspiring future generations to explore the mysteries of Echo Ridge.<br><br><i>Copy and paste to try.</i></blockquote>""",
)
# Launch the interface and include the custom CSS
interface.launch()
gr.HTML(css)