Spaces:
Runtime error
Runtime error
""" | |
# My first app | |
Here's our first attempt at using data to create a table: | |
""" | |
import streamlit as st | |
from retriever import do_search | |
def local_css(file_name): | |
with open(file_name) as f: | |
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) | |
def render_retrieved_content(content, score): | |
if score is not None and score == 0.0: | |
return f'<blockquote> No result </blockquote>' | |
if score is not None: | |
score = round(score, 3) | |
print_score = f'<b> Similarity Score: {score}</b>' | |
return f'<blockquote>{content} </blockquote> {print_score}' | |
local_css('style.css') | |
st.header('🧐 Where my docs at?') | |
st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, ' | |
'but you can not remember on the exact words. Just about rough content. <br><br>' | |
'💡 This demo compares different search approaches that can help you to find the right ' | |
'information.', unsafe_allow_html=True) | |
with st.form('search-input'): | |
option = st.selectbox( | |
'Choose a dataset', | |
('CDU election program 2021', 'Partisan news 2019 (dutch)')) | |
search = st.text_input('Enter your search query') | |
button = st.form_submit_button('Search') | |
if search: | |
result = do_search(search) | |
st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)') | |
st.markdown('Is a statistical approach that calculates how relevant a word is to a document ' | |
'in your collection. Only documents will be found that contain one of the words of ' | |
'the given search query. You still have to remember exact terms that are in the ' | |
'searched phrase.') | |
st.markdown(render_retrieved_content(result[0].content, result[0].score), | |
unsafe_allow_html=True) | |
st.markdown('### 🧠 Semantic Search') | |
st.markdown('An alternative approach is semantic search. Instead of using words of the' | |
'documents to calculate the score, we use a neural network which calculates ' | |
'sentence embeddings. Sentences and documents that are similar will be close to ' | |
'each other in the embedding space. We use this behavior to find topic related ' | |
'documents without knowing the exact terms. If you want learn more about this ' | |
'topic check out one of our recent <a ' | |
'href="https://blog.ml6.eu/decoding-sentence-encoders-37e63244ae00?source=collection_detail----1e091bbd5262-----2-----------------------">blogposts</a>.', | |
unsafe_allow_html=True) | |
st.markdown(render_retrieved_content(result[1].content, result[1].score), | |
unsafe_allow_html=True) | |
st.markdown('### 🚀 Domain Adapted Semantic Search') | |
st.markdown('If our document collection contains a lot of domain-specific documents, ' | |
'we can not use standard models. These models were trained on a large amount of ' | |
'publicly available data, which probably not covers your domain-specific words. To ' | |
'improve the search results, we could fine-tune the network to calculate more ' | |
'accurate similarities between queries and document regarding to your domain.') | |
st.markdown(render_retrieved_content(result[2].content, result[2].score), | |
unsafe_allow_html=True) | |