""" # My first app Here's our first attempt at using data to create a table: """ import streamlit as st from retriever import do_search, dutch_datset_name, german_datset_name def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) def render_retrieved_content(content, score): if score is not None and score == 0.0: return f'

No result

' if score is not None: score = round(score, 3) print_score = f' Similarity Score: {score}' return f'

{content}

{print_score}' local_css('style.css') st.header('🧐 Where my docs at?') st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, ' 'but you can not remember on the exact words. Just about rough content.

' '💡 This demo compares different search approaches that can help you to find the right ' 'information.', unsafe_allow_html=True) with st.form('search-input'): option = st.selectbox( 'Choose a dataset', (german_datset_name, dutch_datset_name)) search = st.text_input('Enter your search query') button = st.form_submit_button('Search') if search: result = do_search(search, option) st.markdown('### 🔎 Term Frequency–Inverse Document Frequency (TF-IDF)') st.markdown('Is a statistical approach that calculates how relevant a word is to a document ' 'in your collection. Only documents will be found that contain one of the words of ' 'the given search query. You still have to remember exact terms that are in the ' 'searched phrase.') st.markdown(render_retrieved_content(result[0].content, result[0].score), unsafe_allow_html=True) st.markdown('### 🧠 Semantic Search') st.markdown('An alternative approach is semantic search. Instead of using words of the' 'documents to calculate the score, we use a neural network which calculates ' 'sentence embeddings. Sentences and documents that are similar will be close to ' 'each other in the embedding space. We use this behavior to find topic related ' 'documents without knowing the exact terms. If you want learn more about this ' 'topic check out one of our recent blogposts.', unsafe_allow_html=True) st.markdown(render_retrieved_content(result[1].content, result[1].score), unsafe_allow_html=True) st.markdown('### 🚀 Domain Adapted Semantic Search') st.markdown('If our document collection contains a lot of domain-specific documents, ' 'we can not use standard models. These models were trained on a large amount of ' 'publicly available data, which probably not covers your domain-specific words. To ' 'improve the search results, we could fine-tune the network to calculate more ' 'accurate similarities between queries and document regarding to your domain.') st.markdown(render_retrieved_content(result[2].content, result[2].score), unsafe_allow_html=True)