datasciencedojo's picture
submit button hover color updated
history blame
No virus
4.15 kB
import gradio as gr
import wikipedia
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
def wikipediaScrap(article_name, wikipedia_language = "en - English"):
wikipedia_language = wikipedia_language.split(" - ")[0]
if wikipedia_language:
# rem_sp = article_name.replace(" ", "")
et_page =
title = et_page.title
content = et_page.content
page_url = et_page.url
linked_pages = et_page.links
text = content
# Create and generate a word cloud image:
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)
# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
return title, content, page_url, "\n". join(linked_pages), plt
css = """
footer {display:none !important}
.output-markdown{display:none !important}
footer {visibility: hidden}
.gr-button-lg {
z-index: 14;
width: 113px;
height: 30px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(17, 20, 45) !important;
border: none !important;
text-align: center !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: none !important;
z-index: 14;
width: 113px;
height: 30px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(66, 133, 244) !important;
border: none !important;
text-align: center !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
#component-14 textarea[data-testid="textbox"] { height: 178px !important}
#component-17 textarea[data-testid="textbox"] { height: 178px !important}
#component-21 textarea[data-testid="textbox"] { height: 178px !important}
#component-20 tr:hover{
background-color: rgb(229,225,255) !important;
.output-image {max-height: 11rem !important;}
.output-image img {max-height: 17rem !important;}
.hover\:bg-orange-50:hover {
--tw-bg-opacity: 1 !important;
background-color: rgb(229,225,255) !important;
ini_dict = wikipedia.languages()
# split dictionary into keys and values
keys = []
values = []
items = ini_dict.items()
for item in items:
keys.append(item[0]), values.append(item[1])
language.append(item[0]+" - "+item[1])
with gr.Blocks(title="Wikipedia Article Scrape | Data Science Dojo", css = css) as demo:
with gr.Row():
inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name")
lan = gr.Dropdown(label=" Select Language", choices=language, value=language[108], interactive=True)
btn = gr.Button("Start scraping", elem_id="dsd_button")
with gr.Row():
with gr.Column():
gr.Markdown("""## About""")
title = gr.Textbox(label="Article title")
url = gr.Textbox(label="Article URL")
with gr.Column():
gr.Markdown("""## Wordcloud""")
wordcloud = gr.Plot()
gr.Markdown("""### Content""")
with gr.Row():
content = gr.Textbox(label="Content")
gr.Markdown("""### Linked Articles""")
with gr.Row():
linked = gr.Textbox(label="Linked Articles"), inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud])
with gr.Row():
gr.Examples(examples = [["Eiffel Tower", "en - English"], ["Eiffel tower", 'ur - اردو']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True)