teatwots commited on
Commit
ea5d289
Β·
verified Β·
1 Parent(s): 943fd65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -72
app.py CHANGED
@@ -1,81 +1,96 @@
 
 
1
  import matplotlib.pyplot as plt
2
  from wordcloud import WordCloud
3
  import nltk
4
  from collections import Counter
5
- from translate import Translator
6
- from nltk.corpus import stopwords
7
  import gradio as gr
 
8
 
9
  # Download necessary NLTK data
10
  nltk.download('punkt')
11
  nltk.download('averaged_perceptron_tagger')
12
  nltk.download('stopwords')
 
13
 
14
- # Set up the translator
15
- translator = Translator(to_lang="ko")
16
  stop_words = set(stopwords.words('english'))
 
17
 
18
- # Define example sentences and synonyms for the word list
19
  word_data_examples = {
20
- "village": ("The village was quiet at night.", "hamlet, community"),
21
- "adventure": ("They went on an exciting adventure in the forest.", "expedition, quest"),
22
- "map": ("We used a map to find the hidden treasure.", "chart, atlas"),
23
- "cave": ("They explored a dark cave in the mountains.", "cavern, grotto"),
24
- "among": ("She found her book among the pile of papers.", "amidst, between"),
25
- "mountains": ("The mountains were covered with snow in winter.", "peaks, ranges"),
26
- "children": ("The children played games in the park.", "kids, youngsters"),
27
- "known": ("He was known for his kindness and bravery.", "recognized, famous"),
28
- "hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret"),
29
- "local": ("The local market was full of fresh produce.", "regional, native"),
30
- "discovery": ("The discovery of the old map excited everyone.", "finding, revelation"),
31
- "eagle": ("An eagle soared high above the valley.", "raptor, bird of prey"),
32
- "villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk"),
33
- "legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore"),
34
- "tales": ("Grandma told us tales of her childhood.", "stories, narratives"),
35
- "daring": ("His daring escape from the cave was legendary.", "bold, audacious"),
36
- "spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence"),
37
- "exploring": ("They spent the summer exploring the forest.", "investigating, discovering"),
38
- "old": ("The old castle was full of secrets.", "ancient, aged"),
39
- "lost": ("He felt lost without his best friend.", "missing, misplaced"),
40
- "ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique"),
41
- "inside": ("Inside the box was a beautiful necklace.", "within, interior"),
42
- "treasure": ("They dreamed of finding hidden treasure.", "riches, valuables"),
43
- "whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking"),
44
- "hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void"),
45
- "decided": ("She decided to take the long way home.", "determined, resolved"),
46
- "journey": ("Their journey took them across the country.", "trip, voyage"),
47
- "together": ("They worked together to solve the mystery.", "jointly, collectively"),
48
- "way": ("She found a new way to solve the puzzle.", "method, manner"),
49
- "reached": ("They finally reached the top of the hill.", "arrived, attained"),
50
- "chest": ("The chest was filled with gold coins.", "trunk, box"),
51
- "boulder": ("A large boulder blocked the path.", "rock, stone"),
52
- "artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities"),
53
- "legends": ("The legends spoke of a hidden kingdom.", "myths, sagas"),
54
- "explore": ("They wanted to explore the old mansion.", "investigate, examine"),
55
- "secret": ("She kept the secret hidden from everyone.", "confidential, hidden"),
56
- "small": ("The small kitten was very playful.", "tiny, little"),
57
- "mountain": ("The mountain was covered in thick forests.", "peak, hill"),
58
- "part": ("Each part of the puzzle was important.", "piece, segment"),
59
- "everyday": ("He wore his everyday clothes to the party.", "daily, routine"),
60
- "life": ("Life in the village was peaceful.", "existence, being"),
61
- "nestled": ("The cabin was nestled in the woods.", "tucked, situated"),
62
- "towering": ("The towering trees made the forest dark and cool.", "lofty, soaring"),
63
- "peaks": ("The mountain peaks were covered in snow.", "summits, crests"),
64
- "said": ("He said he would be back soon.", "stated, remarked"),
65
- "protected": ("The ancient ruins were protected by law.", "guarded, sheltered"),
66
- "massive": ("The massive ship docked at the port.", "enormous, huge"),
67
- "supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly"),
68
- "watched": ("They watched the movie together.", "observed, viewed"),
69
- "perch": ("The bird found a perch on the windowsill.", "roost, rest")
70
  }
71
 
72
  # Words to be excluded from both the word cloud and the word list
73
  exclude_words = set([
74
- 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
75
  'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
76
  'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
77
  ])
78
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def process_text(text):
80
  words = nltk.word_tokenize(text)
81
  words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
@@ -93,30 +108,59 @@ def generate_wordcloud(word_freq):
93
 
94
  def translate_and_get_pos(word_freq, pos_tags):
95
  pos_map = {
96
- 'NN': 'n.', 'NNS': 'n.', 'VB': 'v.', 'VBD': 'v.', 'VBG': 'v.', 'VBN': 'v.',
97
- 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.', 'RB': 'adv.',
98
- 'RBR': 'adv.', 'RBS': 'adv.'
 
99
  }
100
 
 
101
  word_data = []
102
  for word, freq in word_freq.items():
103
- pos = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
104
- if pos and (pos[0] in ['NNP', 'NNPS'] or word in exclude_words):
105
- continue # Skip proper nouns, pronouns, and specific excluded words
106
- translation = translator.translate(word)
107
- pos = pos_map.get(pos[0], 'N/A') if pos else 'N/A'
108
- example_sentence, synonyms = word_data_examples.get(word, (f"ex) The word '{word}' in a sentence.", ""))
109
- word_data.append((word, freq, translation, pos, example_sentence, synonyms))
110
- word_data.sort(key=lambda x: x[1], reverse=True)
111
- return word_data[:50]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  def main(text):
114
  word_freq, pos_tags = process_text(text)
115
  wordcloud_image = generate_wordcloud(word_freq)
116
  word_data = translate_and_get_pos(word_freq, pos_tags)
117
-
118
- word_data_str = "\n".join([f"{i+1}. {word}: {pos} {translation}, ex) {example_sentence} λ™μ˜μ–΄: {synonyms}." for i, (word, freq, translation, pos, example_sentence, synonyms) in enumerate(word_data)])
119
- return wordcloud_image, word_data_str
 
 
 
120
 
121
  # Custom CSS for the Gradio interface
122
  css = """
@@ -128,6 +172,18 @@ body {
128
  background-color: blue !important;
129
  border-color: blue !important;
130
  }
 
 
 
 
 
 
 
 
 
 
 
 
131
  </style>
132
  """
133
 
@@ -135,7 +191,7 @@ body {
135
  interface = gr.Interface(
136
  fn=main,
137
  inputs="text",
138
- outputs=["image", "text"],
139
  title="Wordcloud Vocabulary Learning App",
140
  description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
141
  "<br><br><b>The full text:</b><br>"
 
1
+
2
+
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
5
  import nltk
6
  from collections import Counter
7
+ from nltk.corpus import stopwords, wordnet
8
+ from nltk.stem import WordNetLemmatizer
9
  import gradio as gr
10
+ import pandas as pd
11
 
12
  # Download necessary NLTK data
13
  nltk.download('punkt')
14
  nltk.download('averaged_perceptron_tagger')
15
  nltk.download('stopwords')
16
+ nltk.download('wordnet')
17
 
 
 
18
  stop_words = set(stopwords.words('english'))
19
+ lemmatizer = WordNetLemmatizer()
20
 
21
+ # Define example sentences, synonyms, and Korean meanings for the word list
22
  word_data_examples = {
23
+ "village": ("The village was quiet at night.", "hamlet, community", "λ§ˆμ„", "μ‹œκ³¨"),
24
+ "adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "λͺ¨ν—˜", "μ—¬ν–‰"),
25
+ "map": ("We used a map to find the hidden treasure.", "chart, atlas", "지도", "약도"),
26
+ "cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "동꡴", "κ΅΄"),
27
+ "among": ("She found her book among the pile of papers.", "amidst, between", "κ°€μš΄λ°", "사이에"),
28
+ "mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "μ‚°", "μ‚°λ§₯"),
29
+ "children": ("The children played games in the park.", "kids, youngsters", "아이듀", "어린이"),
30
+ "known": ("He was known for his kindness and bravery.", "recognized, famous", "μ•Œλ €μ§„", "유λͺ…ν•œ"),
31
+ "hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "μˆ¨κ²¨μ§„", "λΉ„λ°€μ˜"),
32
+ "local": ("The local market was full of fresh produce.", "regional, native", "μ§€μ—­μ˜", "ν˜„μ§€μ˜"),
33
+ "discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "발견", "탐ꡬ"),
34
+ "eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "λ…μˆ˜λ¦¬", "맹금"),
35
+ "villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "λ§ˆμ„ μ‚¬λžŒλ“€", "μ£Όλ―Όλ“€"),
36
+ "legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "μ „μ„€", "μ‹ ν™”"),
37
+ "tales": ("Grandma told us tales of her childhood.", "stories, narratives", "이야기", "동화"),
38
+ "daring": ("His daring escape from the cave was legendary.", "bold, audacious", "λŒ€λ‹΄ν•œ", "μš©κ°ν•œ"),
39
+ "spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "μ •μ‹ ", "혼"),
40
+ "exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "νƒν—˜ν•˜λ‹€", "νƒκ΅¬ν•˜λ‹€"),
41
+ "old": ("The old castle was full of secrets.", "ancient, aged", "였래된", "낑은"),
42
+ "lost": ("He felt lost without his best friend.", "missing, misplaced", "μžƒμ–΄λ²„λ¦°", "길을 μžƒμ€"),
43
+ "ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "κ³ λŒ€μ˜", "μ˜›λ‚ μ˜"),
44
+ "inside": ("Inside the box was a beautiful necklace.", "within, interior", "μ•ˆμͺ½", "λ‚΄λΆ€"),
45
+ "treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "κ·€μ€‘ν’ˆ"),
46
+ "whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "μ†μ‚­μ΄λŠ”", "쑰용히 λ§ν•˜λŠ”"),
47
+ "hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "빈", "ꡬ멍 λ‚œ"),
48
+ "decided": ("She decided to take the long way home.", "determined, resolved", "κ²°μ •ν•˜λ‹€", "κ²°μ‹¬ν•˜λ‹€"),
49
+ "journey": ("Their journey took them across the country.", "trip, voyage", "μ—¬ν–‰", "μ—¬μ •"),
50
+ "together": ("They worked together to solve the mystery.", "jointly, collectively", "ν•¨κ»˜", "같이"),
51
+ "way": ("She found a new way to solve the puzzle.", "method, manner", "방법", "방식"),
52
+ "reached": ("They finally reached the top of the hill.", "arrived, attained", "λ„λ‹¬ν•˜λ‹€", "λ„μ°©ν•˜λ‹€"),
53
+ "chest": ("The chest was filled with gold coins.", "trunk, box", "μƒμž", "κ°€μŠ΄"),
54
+ "boulder": ("A large boulder blocked the path.", "rock, stone", "λ°”μœ„", "돌"),
55
+ "artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "유물", "κ³ λŒ€ 유물"),
56
+ "legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "μ „μ„€", "μ‹ ν™”"),
57
+ "explore": ("They wanted to explore the old mansion.", "investigate, examine", "νƒν—˜ν•˜λ‹€", "μ‘°μ‚¬ν•˜λ‹€"),
58
+ "secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "λΉ„λ°€", "μˆ¨κ²¨μ§„"),
59
+ "small": ("The small kitten was very playful.", "tiny, little", "μž‘μ€", "μ†Œν˜•"),
60
+ "mountain": ("The mountain was covered in thick forests.", "peak, hill", "μ‚°", "μ‚°λ§₯"),
61
+ "part": ("Each part of the puzzle was important.", "piece, segment", "λΆ€λΆ„", "쑰각"),
62
+ "everyday": ("He wore his everyday clothes to the party.", "daily, routine", "일상적인", "맀일의"),
63
+ "life": ("Life in the village was peaceful.", "existence, being", "μ‚Ά", "생λͺ…"),
64
+ "nestled": ("The cabin was nestled in the woods.", "tucked, situated", "자리 μž‘λ‹€", "μœ„μΉ˜ν•˜λ‹€"),
65
+ "towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "우뚝 μ†Ÿμ€", "높은"),
66
+ "peaks": ("The mountain peaks were covered in snow.", "summits, crests", "μ‚°λ΄‰μš°λ¦¬", "정상"),
67
+ "said": ("He said he would be back soon.", "stated, remarked", "λ§ν•˜λ‹€", "μ–ΈκΈ‰ν•˜λ‹€"),
68
+ "protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보호된", "μ§€μΌœμ§„"),
69
+ "massive": ("The massive ship docked at the port.", "enormous, huge", "κ±°λŒ€ν•œ", "μ—„μ²­λ‚œ"),
70
+ "supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "μ•„λ§ˆ", "좔정상"),
71
+ "watched": ("They watched the movie together.", "observed, viewed", "보닀", "κ΄€μ°°ν•˜λ‹€"),
72
+ "perch": ("The bird found a perch on the windowsill.", "roost, rest", "νšƒλŒ€", "앉닀")
73
  }
74
 
75
  # Words to be excluded from both the word cloud and the word list
76
  exclude_words = set([
77
+ 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
78
  'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
79
  'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
80
  ])
81
 
82
+ def get_wordnet_pos(treebank_tag):
83
+ if treebank_tag.startswith('J'):
84
+ return wordnet.ADJ
85
+ elif treebank_tag.startswith('V'):
86
+ return wordnet.VERB
87
+ elif treebank_tag.startswith('N'):
88
+ return wordnet.NOUN
89
+ elif treebank_tag.startswith('R'):
90
+ return wordnet.ADV
91
+ else:
92
+ return None
93
+
94
  def process_text(text):
95
  words = nltk.word_tokenize(text)
96
  words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
 
108
 
109
  def translate_and_get_pos(word_freq, pos_tags):
110
  pos_map = {
111
+ 'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (κ³Όκ±°ν˜•)', 'VBG': 'v. (ingν˜•)',
112
+ 'VBN': 'v. (κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
113
+ 'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
114
+ 'UH': 'intj.'
115
  }
116
 
117
+ seen_verbs = set() # To track if we have already annotated specific verb forms
118
  word_data = []
119
  for word, freq in word_freq.items():
120
+ if word not in word_data_examples:
121
+ continue # Skip if the word is not in the word_data_examples
122
+
123
+ pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
124
+ pos_list = set(pos_list) if pos_list else {'N/A'}
125
+ if 'N/A' in pos_list or word in exclude_words:
126
+ continue # Skip if no valid POS or excluded word
127
+ pos_str = ", ".join(pos_list)
128
+
129
+ # Check if the word is a specific verb form and get the base form
130
+ lemmatized_word = word
131
+ original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
132
+ for pos_tag in original_pos_tags:
133
+ wn_pos = get_wordnet_pos(pos_tag)
134
+ if wn_pos == wordnet.VERB:
135
+ lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
136
+ if word != lemmatized_word and lemmatized_word not in seen_verbs:
137
+ if pos_tag.startswith('VBD'):
138
+ pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°ν˜•)"
139
+ elif pos_tag.startswith('VBG'):
140
+ pos_str += f" (v. {lemmatized_word}의 ingν˜•)"
141
+ elif pos_tag.startswith('VBN'):
142
+ pos_str += f" (v. {lemmatized_word}의 κ³Όκ±°λΆ„μ‚¬ν˜•/μˆ˜λ™νƒœ)"
143
+ seen_verbs.add(lemmatized_word)
144
+
145
+ translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
146
+ example_sentence, synonyms = word_data_examples[word][:2]
147
+ word_data.append((word, pos_str, translation, example_sentence, synonyms))
148
+
149
+ # Sort the word data by frequency
150
+ word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)
151
+
152
+ return word_data
153
 
154
  def main(text):
155
  word_freq, pos_tags = process_text(text)
156
  wordcloud_image = generate_wordcloud(word_freq)
157
  word_data = translate_and_get_pos(word_freq, pos_tags)
158
+
159
+ # Create a DataFrame to display the word data in a table format
160
+ df = pd.DataFrame(word_data, columns=["μ–΄νœ˜ (Word)", "λ²”μ£Ό (Category)", "뜻 (Meaning)", "예문 (Example)", "λ™μ˜μ–΄ (Synonyms)"])
161
+ word_data_table = df.to_html(index=False, justify='center')
162
+
163
+ return wordcloud_image, word_data_table
164
 
165
  # Custom CSS for the Gradio interface
166
  css = """
 
172
  background-color: blue !important;
173
  border-color: blue !important;
174
  }
175
+ table {
176
+ width: 100%;
177
+ border-collapse: collapse;
178
+ text-align: center;
179
+ }
180
+ th, td {
181
+ padding: 8px;
182
+ border: 1px solid #ddd;
183
+ }
184
+ th {
185
+ background-color: #f2f2f2;
186
+ }
187
  </style>
188
  """
189
 
 
191
  interface = gr.Interface(
192
  fn=main,
193
  inputs="text",
194
+ outputs=["image", "html"],
195
  title="Wordcloud Vocabulary Learning App",
196
  description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
197
  "<br><br><b>The full text:</b><br>"