jaimin commited on
Commit
f805bf8
1 Parent(s): d84bb60

Upload 3 files

Browse files
Files changed (3) hide show
  1. HelveticaWorld-Regular.ttf +0 -0
  2. app.py +47 -0
  3. requirements.txt +3 -0
HelveticaWorld-Regular.ttf ADDED
Binary file (657 kB). View file
 
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import wikipedia
3
+ import numpy as np
4
+ import pandas as pd
5
+ from os import path
6
+ from PIL import Image
7
+ from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
8
+ import matplotlib.pyplot as plt
9
+
10
+
11
+ def wikipediaScrap(article_name, wikipedia_language="en"):
12
+ if wikipedia_language:
13
+ wikipedia.set_lang(wikipedia_language)
14
+
15
+ et_page = wikipedia.page(article_name)
16
+ title = et_page.title
17
+ content = et_page.content
18
+ page_url = et_page.url
19
+ linked_pages = et_page.links
20
+
21
+ text = content
22
+
23
+ # Create and generate a word cloud image:
24
+ wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)
25
+
26
+ # Display the generated image:
27
+ plt.imshow(wordcloud, interpolation='bilinear')
28
+ plt.axis("off")
29
+
30
+ return content
31
+
32
+
33
+ with gr.Blocks(css="footer {visibility: hidden} #dsd_button {background: purple, color: white}") as demo:
34
+ with gr.Row():
35
+ inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name")
36
+ lan = gr.Textbox(placeholder="Enter the language code", label="Language")
37
+ btn = gr.Button("Start Scraping", elem_id="dsd_button")
38
+ with gr.Row():
39
+ content = gr.Textbox(label="Content")
40
+ with gr.Row():
41
+ gr.Examples(
42
+ examples=[["Eiffel Tower"], ["Eiffel tower"]], fn=wikipediaScrap, inputs=[inp],
43
+ outputs=[content], cache_examples=True)
44
+
45
+ interface = gr.Interface(fn=wikipediaScrap, inputs=[inp,lan], outputs=[content])
46
+
47
+ interface.launch(inline=False)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ wikipedia
3
+ wordcloud