|
import logging |
|
import warnings |
|
import wikipedia |
|
import streamlit as st |
|
from typing import List |
|
from scanner_utils import * |
|
from xgboost import XGBClassifier |
|
from streamlit_searchbox import st_searchbox |
|
from transformers import logging as hflogging |
|
|
|
|
|
logging.disable(logging.WARNING) |
|
hflogging.set_verbosity_warning() |
|
|
|
warnings.simplefilter(action='ignore', category=UserWarning) |
|
warnings.simplefilter(action='ignore', category=FutureWarning) |
|
warnings.simplefilter(action='ignore', category=DeprecationWarning) |
|
|
|
st.set_page_config(layout="centered", page_title="Egyptian Wikipedia Scanner", page_icon="🇪🇬") |
|
|
|
wikipedia.set_lang("arz") |
|
|
|
|
|
with open('.streamlit/style.css') as f: |
|
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
<h1 style='text-align: center';>Egyptian Arabic Wikipedia Scanner</h1> |
|
<h5 style='text-align: center';>Automatic Detection of Template-translated Articles in the Egyptian Wikipedia</h5> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("", unsafe_allow_html=True) |
|
|
|
|
|
def search_wikipedia(searchterm: str) -> List[any]: |
|
return wikipedia.search(searchterm) if searchterm else [] |
|
|
|
|
|
@st.cache_resource |
|
def load_xgb_model(model): |
|
loaded_xgb_classifier = XGBClassifier() |
|
loaded_xgb_classifier.load_model(model) |
|
return loaded_xgb_classifier |
|
|
|
|
|
selected_title = st_searchbox(search_wikipedia, label="Search for an article in Egyptian Arabic Wikipedia:", |
|
placeholder="Search for an article", rerun_on_update=True, clear_on_submit=False, key="wiki_searchbox") |
|
|
|
if selected_title: |
|
X, article, dataframe, selected_title = prepare_features(selected_title) |
|
|
|
st.write(f':black_small_square: Collected Metadata of **{selected_title}**') |
|
|
|
st.dataframe(dataframe, hide_index=True , use_container_width=True) |
|
|
|
loaded_xgb_classifier = load_xgb_model("XGBoost.model") |
|
|
|
id2label = {0:'Human-generated Article', 1:'Template-translated Article'} |
|
|
|
result = id2label[int(loaded_xgb_classifier.predict(X))] |
|
|
|
if result =='Human-generated Article': |
|
st.write(f":black_small_square: Automatic Classification of **{selected_title}**") |
|
st.success(result, icon="✅") |
|
|
|
else: |
|
st.write(f":black_small_square: Automatic Classification of **{selected_title}**") |
|
st.error(result, icon="🚨") |
|
|
|
st.write(f":black_small_square: Full Summary of **{selected_title}**") |
|
|
|
with st.expander(f'**{selected_title}**', expanded=True): |
|
st.markdown('<style>p {text-align: justify;}</style>', unsafe_allow_html=True) |
|
try: |
|
article_text = wikipedia.summary(selected_title) |
|
|
|
except wikipedia.exceptions.DisambiguationError as e: |
|
article_text = wikipedia.summary(e.options[0]) |
|
st.write(article_text) |
|
st.write(f'> :globe_with_meridians: Read Full Text of **{selected_title}**: <br>{article.url}', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown('<br><br>', unsafe_allow_html=True) |
|
|
|
|
|
footer=""" |
|
<div class="footer"> <p class="p1">Copyright©2024 by Saied Alshahrani<br>Hosted with Hugging Face Spaces 🤗</p> </div> |
|
""" |
|
st.markdown(footer, unsafe_allow_html=True) |