File size: 3,403 Bytes
10641ee
 
 
 
 
 
 
01628bb
 
 
 
 
 
181e8c5
 
01628bb
 
 
 
 
10641ee
01628bb
 
 
 
 
 
 
10641ee
 
181e8c5
 
 
 
 
 
 
10641ee
 
 
01628bb
 
 
181e8c5
01628bb
181e8c5
01628bb
10641ee
181e8c5
 
 
 
 
 
 
 
 
 
01628bb
10641ee
181e8c5
 
01628bb
181e8c5
01628bb
 
181e8c5
01628bb
10641ee
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
# My first app
Here's our first attempt at using data to create a table:
"""
import streamlit as st
from retriever import do_search

def local_css(file_name):
    with open(file_name) as f:
        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)


def render_retrieved_content(content, score):
    if score is not None and score == 0.0:
        return f'<blockquote> No result </blockquote>'
    if score is not None:
        score = round(score, 3)
        print_score = f'<b> Similarity Score: {score}</b>'
    return f'<blockquote>{content} </blockquote> {print_score}'

local_css('style.css')
st.header('🧐  Where my docs at?')
st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, '
         'but you can not remember on the exact words. Just about rough content. <br><br>'
         '💡 This demo compares different search approaches that can help you to find the right '
         'information.', unsafe_allow_html=True)




with st.form('search-input'):
    option = st.selectbox(
        'Choose a dataset',
        ('CDU election program 2021', 'Partisan news 2019 (dutch)'))
    search = st.text_input('Enter your search query')
    button = st.form_submit_button('Search')

if search:
    result = do_search(search)

    st.markdown('### 🔎  Term Frequency–Inverse Document Frequency (TF-IDF)')
    st.markdown('Is a statistical approach that calculates how relevant a word is to a document '
                'in your collection. Only documents will be found that contain one of the words of '
                'the given search query. You still have to remember exact terms that are in the '
                'searched phrase.')
    st.markdown(render_retrieved_content(result[0].content, result[0].score),
                unsafe_allow_html=True)

    st.markdown('### 🧠  Semantic Search')
    st.markdown('An alternative approach is semantic search. Instead of using words of the' 
                'documents to calculate the score, we use a neural network which calculates '
                'sentence embeddings. Sentences and documents that are similar will be close to '
                'each other in the embedding space. We use this behavior to find topic related '
                'documents without knowing the exact terms. If you want learn more about this '
                'topic check out one of our recent <a '
                'href="https://blog.ml6.eu/decoding-sentence-encoders-37e63244ae00?source=collection_detail----1e091bbd5262-----2-----------------------">blogposts</a>.',
                unsafe_allow_html=True)
    st.markdown(render_retrieved_content(result[1].content, result[1].score),
                unsafe_allow_html=True)

    st.markdown('### 🚀  Domain Adapted Semantic Search')
    st.markdown('If our document collection contains a lot of domain-specific documents, '
                'we can not use standard models. These models were trained on a large amount of '
                'publicly available data, which probably not covers your domain-specific words. To '
                'improve the search results, we could fine-tune the network to calculate more '
                'accurate similarities between queries and document regarding to your domain.')
    st.markdown(render_retrieved_content(result[2].content, result[2].score),
                unsafe_allow_html=True)