|
|
|
__company__ = '' |
|
__project__ = 'Observatory News - Final Project' |
|
__author__ = 'Strauss' |
|
__intial__ = '2023-08-07' |
|
|
|
import sys |
|
import os |
|
import pandas as pd |
|
import streamlit as st |
|
from database.data import Data |
|
from utils.graphs import Altair |
|
from database.data import Data |
|
import glob |
|
from datasets import load_dataset |
|
|
|
sys.path.append(os.path.dirname(os.getcwd())) |
|
|
|
|
|
st.markdown("""<style>[data-testid="stMetricValue"] {font-size: 20px;}</style>""", unsafe_allow_html=True,) |
|
|
|
data = Data() |
|
|
|
|
|
def load_ds(media): |
|
dir = '{0}/{1}'.format(data.pth_data, media) |
|
st.info(dir) |
|
files = glob.glob(dir + '/*.csv') |
|
df_list = (pd.read_csv(file) for file in files) |
|
st.info(files) |
|
df = pd.concat(df_list, ignore_index=True) |
|
st.info('Loaded {0} rows and {1} columns'.format(df.shape[0], df.shape[1])) |
|
return df |
|
|
|
def load_hugging_face_ds(media): |
|
|
|
|
|
dataset = load_dataset("strauss-oak/observatory-brazilian-news") |
|
|
|
dir = '{0}{1}/202101.csv'.format(data.pth_data, media) |
|
st.info(dir) |
|
dataset = load_dataset('csv', data_files=[dir], delimiter=',') |
|
return dataset |
|
|
|
|
|
c1 = st.container() |
|
with c1: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from huggingface_hub import login |
|
|
|
access_token = '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' |
|
|
|
login(access_token) |
|
|
|
|
|
dataset = load_dataset("strauss-oak/observatory-brazilian-news", token=True) |
|
|
|
st.dataframe(dataset) |
|
|
|
df = pd.DataFrame.from_dict(dataset["train"]) |
|
st.dataframe(df) |
|
|