christopher commited on
Commit
2a555c8
β€’
0 Parent(s):

Duplicate from webis/chat-noir

Browse files
Files changed (4) hide show
  1. .gitattributes +27 -0
  2. README.md +14 -0
  3. app.py +64 -0
  4. requirements.txt +1 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 'Chat Noir: Search Engine for the ClueWeb and the Common Crawl'
3
+ emoji: 🐈
4
+ colorFrom: black
5
+ colorTo: white
6
+ sdk: streamlit
7
+ sdk_version: 1.2.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: webis/chat-noir
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from chatnoir_api.v1 import search
3
+
4
+ st.set_page_config(
5
+ page_title="ChatNoir",
6
+ page_icon="🐈",
7
+ layout="centered"
8
+ )
9
+
10
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True, show_spinner=False)
11
+ def search_chat_noir(key, search_query):
12
+ return search(api_key=key, query=search_query)
13
+
14
+ def result_html(result):
15
+ return (
16
+ f"<div style=\"color:#2a5cb3;font-weight: 500\">{(result.title.html).replace('<em>', '<b>').replace('</em>','</b>')}</div>"
17
+ f"<a href=\"{result.target_uri}\" style=\"color:Green;\">{result.target_uri}</a>:<br>"
18
+ f"<div>{(result.snippet.html).replace('<em>', '<b>').replace('</em>','</b>')}</div><br>"
19
+ )
20
+
21
+ cola, colb, colc = st.columns([5,4,5])
22
+ with colb:
23
+ st.image("https://www.chatnoir.eu/static/img/chatnoir.svg")
24
+
25
+ col1, col2 = st.columns([9, 1])
26
+ with col1:
27
+ search_query = st.text_input(label="",
28
+ placeholder="Search"
29
+ )
30
+
31
+ with col2:
32
+ st.write('#')
33
+ button_clicked = st.button("πŸ”Ž")
34
+
35
+
36
+ if search_query or button_clicked:
37
+ search_results = search_chat_noir(st.secrets["key"], search_query)
38
+ for result in search_results[:10]:
39
+ st.write(result_html(result), unsafe_allow_html=True)
40
+
41
+ with st.expander("🐈 About", expanded=False):
42
+ st.markdown(
43
+ """
44
+ This is a **work in progress** streamlit version of our [ChatNoir](https://www.chatnoir.eu/) search engine. ChatNoir is an Elasticsearch-based search engine offering a freely accessible search interface for the two ClueWeb corpora and the Common Crawl, together about 3 billion web pages. This version of the search engine uses the [Search API](https://www.chatnoir.eu/doc/api/) by way of the Python [chatnoir-api] Package and is therefore not as fast as the main site.
45
+
46
+ If you find this project useful in your research, please consider citing:
47
+
48
+ ```
49
+ @InProceedings{bevendorff:2018,
50
+ address = {Berlin Heidelberg New York},
51
+ author = {Janek Bevendorff and Benno Stein and Matthias Hagen and Martin Potthast},
52
+ booktitle = {Advances in Information Retrieval. 40th European Conference on IR Research (ECIR 2018)},
53
+ editor = {Leif Azzopardi and Allan Hanbury and Gabriella Pasi and Benjamin Piwowarski},
54
+ ids = {potthast:2018c,stein:2018c},
55
+ month = mar,
56
+ publisher = {Springer},
57
+ series = {Lecture Notes in Computer Science},
58
+ site = {Grenoble, France},
59
+ title = {{Elastic ChatNoir: Search Engine for the ClueWeb and the Common Crawl}},
60
+ year = 2018
61
+ }
62
+ ```
63
+ """
64
+ )
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ chatnoir-api