File size: 5,101 Bytes
c188624 cd22fcf c188624 3285b2f c188624 c8e2bd5 fb4de31 3a6124d c188624 a17b65d 8ca54ac 05edb91 8ca54ac a17b65d f80ab29 a17b65d c90738f a17b65d f80ab29 a17b65d faccbfc c90738f cef2390 c90738f c188624 3a6124d 083cdef c188624 aa06dc2 c46cd83 bc5855e 699c8d1 4b58234 c188624 0bb7e36 c188624 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import pandas as pd
import streamlit as st
from transformers import *
from carga_articulos import cargar_articulos
from preprocesamiento_articulos import limpieza_articulos
from entrenamiento_modelo import term_document_matrix, tf_idf_score
from resultados_consulta import resultados_consulta, detalles_resultados
import tensorflow as tf
def crear_indice():
df=cargar_articulos()
vocab = limpieza_articulos(df)
td_matrix=term_document_matrix(df, vocab, 'ID', 'titulo')
td_idf_matrix=tf_idf_score(td_matrix, df.ID.values)
td_idf_matrix.to_csv('articulos_indexados.csv')
def load_qa_model():
tokenizer = AutoTokenizer.from_pretrained('mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es', use_fast="false")
model = TFDistilBertForQuestionAnswering.from_pretrained("mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es", from_pt=True)
return tokenizer, model
# 4. Use streamlit to create a web app
def main():
#crear_indice()
st.set_page_config(page_title="Buscador de noticias periodicos dominicanos", page_icon="📰")
st.image('repartidor_periodicos.jpeg', width=150)
st.header('El Repartidor Dominicano')
# Sidebar
st.sidebar.header("Acerca de")
st.sidebar.markdown(
"[Streamlit](https://streamlit.io) is a Python library that allows the creation of interactive, data-driven web applications in Python."
)
st.sidebar.header("Artículos Indexados")
st.sidebar.markdown(
"""
- [Streamlit Documentation](https://docs.streamlit.io/)
- [Cheat sheet](https://docs.streamlit.io/library/cheatsheet)
- [Book](https://www.amazon.com/dp/180056550X) (Getting Started with Streamlit for Data Science)
- [Blog](https://blog.streamlit.io/how-to-master-streamlit-for-data-science/) (How to master Streamlit for data science)
"""
)
st.sidebar.header("Disclaimer")
st.sidebar.markdown(
"You can quickly deploy Streamlit apps using [Streamlit Community Cloud](https://streamlit.io/cloud) in just a few clicks."
)
st.sidebar.header("¿Te gustó mi sitio? ¡Cómprame una café!")
with st.sidebar:
components.html(
"""
<div id="donate-button-container">
<div id="donate-button"></div>
<script src="https://www.paypalobjects.com/donate/sdk/donate-sdk.js" charset="UTF-8"></script>
<script>
PayPal.Donation.Button({
env:'production',
hosted_button_id:'VK5ZAB52ZYDNA',
image: {
src:'https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif',
alt:'Donate with PayPal button',
title:'PayPal - The safer, easier way to pay online!',
}
}).render('#donate-button');
</script>
</div>
"""
)
df=cargar_articulos()
articulos_indexados = pd.read_csv('articulos_indexados.csv')
articulos_indexados = articulos_indexados.set_index('Unnamed: 0')
tokenizer, qa_model = load_qa_model()
query = st.text_input(
"Escribe tus términos de búsqueda o haz una pregunta terminando con el caracter ?:"
)
if query:
if ('?' in query):
st.write("Contestando a: ", query)
text='Un texto es una composición de signos codificados en un sistema de escritura que forma una unidad de sentido.'
inputs = tokenizer(query, text, return_tensors='tf')
outputs = qa_model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0])
answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0])
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
answer=tokenizer.decode(predict_answer_tokens)
st.info(answer)
else:
st.write("Buscando: ", query)
result = resultados_consulta(df,articulos_indexados, query)
if result.empty:
st.info("No se encontraron artículos para la búsqueda solicitada")
else:
df_results=detalles_resultados(df,result)
N_cards_per_row = 1
for n_row, row in df_results.reset_index().iterrows():
i = n_row%N_cards_per_row
if i==0:
st.write("---")
cols = st.columns(N_cards_per_row, gap="large")
# draw the card
with cols[n_row%N_cards_per_row]:
st.caption(f"{row['feed'].strip()} - {row['seccion'].strip()} - {row['fecha'].strip()} ")
st.markdown(f"**{row['titulo'].strip()}**")
st.markdown(f"{row['resumen'].strip()}")
st.markdown(f"{row['link']}")
if __name__ == "__main__":
main()
|