Spaces:

yutohub
/

hf_daily_papers

Sleeping

yutohub commited on 29 days ago

Commit

309c9bf

•

1 Parent(s): 85a7479

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

+import json
+from langchain_community.retrievers import BM25Retriever
+from langchain_core.documents import Document
+import streamlit as st
+def load_docs_from_json(json_path):
+    with open(json_path) as f:
+        papers = json.load(f)
+    docs = []
+    for paper in papers:
+        page_content = f"{paper['title']}\n\n{paper['abstract']}"
+        doc = Document(
+        page_content=page_content,
+        metadata={
+            'title': paper['title'],
+            'link': paper['link'],
+            'authors': paper['authors'],
+            'submitter': paper['submitter'],
+            'date': paper['date'],
+            }
+        )
+    docs.append(doc)
+    return docs
+# init
+json_path = "hf_daily_papers_2023-05-04_2024-06-27.json"
+docs = load_docs_from_json(json_path)
+retriever = BM25Retriever.from_documents(docs)
+retriever.k = 10
+# streamlit
+st.title(HF Daily Papers Search)
+st.markdown("Search papers from [HF daily papers](https://huggingface.co/papers).")
+user_query = st.text_input("Search anything...")
+if st.button('→'):
+    results = retriever.invoke(user_query)
+    for result in results:
+        with st.expander(label=result.metadata['title'], expanded=False):
+            for k, v in result.metadata:
+                st.write(f"{k}: {v}")
+            st.divider()
+            st.markdown(result.page_content)