File size: 2,653 Bytes
af2e0d6
 
 
 
 
 
 
 
 
04383ec
 
 
 
08550c9
 
07b2f64
aac23ef
1bac24e
08550c9
04383ec
af2e0d6
 
 
 
 
 
 
 
 
 
 
 
 
 
719899a
e4b8bc7
719899a
 
7e016e5
719899a
 
af2e0d6
1bac24e
576add0
c370283
 
f203ad2
af2e0d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719899a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
from chatnoir_api.v1 import search

st.set_page_config(
    page_title="ChatNoir",
    page_icon="🐈",
    layout="centered"
)

@st.cache(suppress_st_warning=True, allow_output_mutation=True, show_spinner=False)
def search_chat_noir(key, search_query):
    return search(api_key=key, query=search_query)

def result_html(result):
    return (
    f"<div style=\"color:#2a5cb3;font-weight: 500\">{(result.title.html).replace('<em>', '<b>').replace('</em>','</b>')}</div>"
    f"<a href=\"{result.target_uri}\" style=\"color:Green;\">{result.target_uri}</a>:<br>"
    f"<div>{(result.snippet.html).replace('<em>', '<b>').replace('</em>','</b>')}</div><br>"
    )

cola, colb, colc = st.columns([5,4,5])
with colb:
    st.image("https://www.chatnoir.eu/static/img/chatnoir.svg")

col1, col2 = st.columns([9, 1])
with col1:
    search_query = st.text_input(label="",
                placeholder="Search"
            )

with col2:
    st.write('#')
    button_clicked = st.button("πŸ”Ž")


if search_query or button_clicked:
    search_results = search_chat_noir(st.secrets["key"], search_query)
    for result in search_results[:10]:
        st.write(result_html(result), unsafe_allow_html=True)
    
with st.expander("🐈 About", expanded=False):
    st.markdown(
        """
        This is a **work in progress** streamlit version of our [ChatNoir](https://www.chatnoir.eu/) search engine. ChatNoir is an Elasticsearch-based search engine offering a freely accessible search interface for the two ClueWeb corpora and the Common Crawl, together about 3 billion web pages. This version of the search engine uses the [Search API](https://www.chatnoir.eu/doc/api/) by way of the Python [chatnoir-api] Package and is therefore not as fast as the main site.

If you find this project useful in your research, please consider citing:
 
```
@InProceedings{bevendorff:2018,
  address =               {Berlin Heidelberg New York},
  author =                {Janek Bevendorff and Benno Stein and Matthias Hagen and Martin Potthast},
  booktitle =             {Advances in Information Retrieval. 40th European Conference on IR Research (ECIR 2018)},
  editor =                {Leif Azzopardi and Allan Hanbury and Gabriella Pasi and Benjamin Piwowarski},
  ids =                   {potthast:2018c,stein:2018c},
  month =                 mar,
  publisher =             {Springer},
  series =                {Lecture Notes in Computer Science},
  site =                  {Grenoble, France},
  title =                 {{Elastic ChatNoir: Search Engine for the ClueWeb and the Common Crawl}},
  year =                  2018
}
```
	    """
    )