yutohub commited on
Commit
309c9bf
1 Parent(s): 85a7479

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py CHANGED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ from langchain_community.retrievers import BM25Retriever
4
+ from langchain_core.documents import Document
5
+ import streamlit as st
6
+
7
+
8
+ def load_docs_from_json(json_path):
9
+ with open(json_path) as f:
10
+ papers = json.load(f)
11
+
12
+ docs = []
13
+ for paper in papers:
14
+ page_content = f"{paper['title']}\n\n{paper['abstract']}"
15
+ doc = Document(
16
+ page_content=page_content,
17
+ metadata={
18
+ 'title': paper['title'],
19
+ 'link': paper['link'],
20
+ 'authors': paper['authors'],
21
+ 'submitter': paper['submitter'],
22
+ 'date': paper['date'],
23
+ }
24
+ )
25
+ docs.append(doc)
26
+
27
+ return docs
28
+
29
+
30
+ # init
31
+ json_path = "hf_daily_papers_2023-05-04_2024-06-27.json"
32
+ docs = load_docs_from_json(json_path)
33
+ retriever = BM25Retriever.from_documents(docs)
34
+ retriever.k = 10
35
+
36
+ # streamlit
37
+ st.title(HF Daily Papers Search)
38
+ st.markdown("Search papers from [HF daily papers](https://huggingface.co/papers).")
39
+
40
+ user_query = st.text_input("Search anything...")
41
+ if st.button('→'):
42
+ results = retriever.invoke(user_query)
43
+
44
+ for result in results:
45
+ with st.expander(label=result.metadata['title'], expanded=False):
46
+ for k, v in result.metadata:
47
+ st.write(f"{k}: {v}")
48
+ st.divider()
49
+ st.markdown(result.page_content)