''' This script is used to generate the most relevant papers of a given title. - Search for as many as possible references. For 10~15 keywords, 10 references each. - Sort the results from most relevant to least relevant. - Return the most relevant using token size. Note: we do not use this function in auto-draft function. It has been integrated in that. ''' import os.path import json from utils.references import References from section_generator import keywords_generation # section_generation_bg, #, figures_generation, section_generation import itertools from gradio_client import Client def generate_raw_references(title, description="", bib_refs=None, tldr=False, max_kw_refs=10, save_to="ref.bib"): # load pre-provided references ref = References(title, bib_refs) # generate multiple keywords for searching input_dict = {"title": title, "description": description} keywords, usage = keywords_generation(input_dict) keywords = list(keywords) comb_keywords = list(itertools.combinations(keywords, 2)) for comb_keyword in comb_keywords: keywords.append(" ".join(comb_keyword)) keywords = {keyword:max_kw_refs for keyword in keywords} print(f"keywords: {keywords}\n\n") ref.collect_papers(keywords, tldr=tldr) paper_json = ref.to_json() with open(save_to, "w") as f: json.dump(paper_json, f) return save_to, ref # paper_json def generate_top_k_references(title, description="", bib_refs=None, tldr=False, max_kw_refs=10, save_to="ref.bib", top_k=5): json_path, ref_raw = generate_raw_references(title, description, bib_refs, tldr, max_kw_refs, save_to) json_content = ref_raw.to_json() client = Client("https://shaocongma-evaluate-specter-embeddings.hf.space/") result = client.predict( title, # str in 'Title' Textbox component json_path, # str (filepath or URL to file) in 'Papers JSON (as string)' File component top_k, # int | float (numeric value between 1 and 50) in 'Top-k Relevant Papers' Slider component api_name="/get_k_relevant_papers" ) with open(result) as f: result = json.load(f) return result if __name__ == "__main__": import openai openai.api_key = os.getenv("OPENAI_API_KEY") title = "Using interpretable boosting algorithms for modeling environmental and agricultural data" description = "" save_to = "paper.json" save_to, paper_json = generate_raw_references(title, description, save_to=save_to) print("`paper.json` has been generated. Now evaluating its similarity...") k = 5 client = Client("https://shaocongma-evaluate-specter-embeddings.hf.space/") result = client.predict( title, # str in 'Title' Textbox component save_to, # str (filepath or URL to file) in 'Papers JSON (as string)' File component k, # int | float (numeric value between 1 and 50) in 'Top-k Relevant Papers' Slider component api_name="/get_k_relevant_papers" ) with open(result) as f: result = json.load(f) print(result) save_to = "paper2.json" with open(save_to, "w") as f: json.dump(result, f)