Spaces:
Build error
Build error
File size: 4,319 Bytes
2bf46ef a8d4e3d 2bf46ef a864c58 dc621b3 8442e32 a8d4e3d 8387173 8cfcf51 f310b8b 8b54034 7fd0c02 8b54034 f07cf21 8b54034 f310b8b a8d4e3d f310b8b 1f3126c a8d4e3d 2bf46ef f310b8b 2bf46ef a8492e7 2bf46ef 524154e 8cfcf51 524154e 8cfcf51 8b54034 524154e 8cfcf51 2bf46ef ad420aa a8492e7 7a5b32f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
from typing import List, Optional
import streamlit as st
import streamlit_pydantic as sp
from pydantic import BaseModel, Field
from PIL import Image
import tempfile
from pathlib import Path
from src.Surveyor import Surveyor
@st.experimental_singleton(suppress_st_warning=True)
def get_surveyor_instance(_print_fn, _survey_print_fn):
with st.spinner('Loading The-Researcher ...'):
return Surveyor(print_fn=_print_fn, survey_print_fn=_survey_print_fn, high_gpu=True)
def run_survey(surveyor, download_placeholder, research_keywords=None, arxiv_ids=None, max_search=None, num_papers=None):
import hashlib
import time
hash = hashlib.sha1()
hash.update(str(time.time()).encode('utf-8'))
temp_hash = hash.hexdigest()
survey_root = Path(temp_hash).resolve()
dir_args = {f'{dname}_dir': survey_root / dname for dname in ['pdf', 'txt', 'img', 'tab', 'dump']}
for d in dir_args.values():
d.mkdir(exist_ok=True, parents=True)
print(survey_root)
print(dir_args)
dir_args = {k: str(v.resolve()) for k, v in dir_args.items()}
zip_file_name, survey_file_name = surveyor.survey(research_keywords,
arxiv_ids,
max_search=max_search,
num_papers=num_papers
**dir_args)
show_survey_download(zip_file_name, survey_file_name, download_placeholder)
def show_survey_download(zip_file_name, survey_file_name, download_placeholder):
with open(str(zip_file_name), "rb") as file:
btn = download_placeholder.download_button(
label="Download extracted topic-clustered-highlights, images and tables as zip",
data=file,
file_name=str(zip_file_name)
)
with open(str(survey_file_name), "rb") as file:
btn = download_placeholder.download_button(
label="Download detailed generated survey file",
data=file,
file_name=str(survey_file_name)
)
class KeywordsModel(BaseModel):
research_keywords: Optional[str] = Field(
'', description="Enter your research keywords:"
)
max_search: int = Field(
10, ge=1, le=50, multiple_of=1,
description="num_papers_to_search:"
)
num_papers: int = Field(
3, ge=1, le=8, multiple_of=1,
description="num_papers_to_select:"
)
class ArxivIDsModel(BaseModel):
arxiv_ids: Optional[str] = Field(
'', description="Enter comma_separated arxiv ids for your curated set of papers (e.g. 2205.12755, 2205.10937, ...):"
)
if __name__ == '__main__':
st.sidebar.image(Image.open('logo_landscape.png'), use_column_width = 'always')
st.title('Auto-Research')
st.write('#### A no-code utility to generate a detailed well-cited survey with topic clustered sections'
'(draft paper format) and other interesting artifacts from a single research query or a curated set of papers(arxiv ids).')
st.write('##### Data Provider: arXiv Open Archive Initiative OAI')
st.write('##### GitHub: https://github.com/sidphbot/Auto-Research')
download_placeholder = st.container()
with st.sidebar.form(key="survey_keywords_form"):
session_data = sp.pydantic_input(key="keywords_input_model", model=KeywordsModel)
st.write('or')
session_data.update(sp.pydantic_input(key="arxiv_ids_input_model", model=ArxivIDsModel))
submit = st.form_submit_button(label="Submit")
st.sidebar.write('#### execution log:')
run_kwargs = {'surveyor':get_surveyor_instance(_print_fn=st.sidebar.write, _survey_print_fn=st.write),
'download_placeholder':download_placeholder}
if submit:
if session_data['research_keywords'] != '':
run_kwargs.update({'research_keywords':session_data['research_keywords'],
'max_search':session_data['max_search'],
'num_papers':session_data['num_papers']})
elif session_data['arxiv_ids'] != '':
run_kwargs.update({'arxiv_ids':[id.strip() for id in session_data['arxiv_ids'].split(',')]})
run_survey(**run_kwargs)
|