File size: 4,945 Bytes
4596869
 
 
 
868658d
4596869
 
 
 
 
 
 
e632772
 
 
4596869
 
 
 
 
 
 
 
 
 
 
837effe
4596869
837effe
 
4596869
837effe
 
 
 
 
 
 
 
 
 
 
 
 
4596869
cf0645c
 
 
 
 
 
4596869
 
 
 
 
 
 
 
 
 
 
 
 
837effe
4596869
 
 
 
 
 
 
 
837effe
 
4596869
 
 
 
837effe
 
5fd1a0f
837effe
 
 
 
4596869
 
 
e632772
4596869
 
 
 
 
cf0645c
 
4596869
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import logging
import os
import gradio as gr
import pandas as pd
from pinecone import Pinecone
from utils import get_zotero_ids, get_arxiv_papers, get_hf_embeddings, upload_to_pinecone, get_new_papers, recommend_papers

HF_API_KEY = os.getenv('HF_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
INDEX_NAME = os.getenv('INDEX_NAME')
NAMESPACE_NAME = os.getenv('NAMESPACE_NAME')

script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir) 

def category_radio(cat):
    if cat == 'Computer Vision and Pattern Recognition':
        return 'cs.CV'
    elif cat == 'Computation and Language':
        return 'cs.CL'
    elif cat == 'Artificial Intelligence':
        return 'cs.AI'
    elif cat == 'Robotics':
        return 'cs.RO'

def comment_radio(com):
    if com == 'None':
        return None
    else:
        return com
    
def reset_project():
    file_path = 'arxiv-scrape.csv'
    if os.path.exists(file_path):
        os.remove(file_path)
        logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")

    api_key = os.getenv('PINECONE_API_KEY')
    index = os.getenv('INDEX_NAME')
    pc = Pinecone(api_key = api_key)
    if index in pc.list_indexes().names():
        pc.delete_index(index)
        logging.info(f"{index} index has been deleted from the vectordb. Delete reset_project() if you want to persist recommended papers.")
    return f"{file_path} has been deleted.<br />{index} index has been deleted from the vectordb.<br />"

def reset_csv():
    file_path = 'arxiv-scrape.csv'
    if os.path.exists(file_path):
        os.remove(file_path)
        logging.info(f"{file_path} has been deleted. Delete reset_project() if you want to persist recommended papers.")

with gr.Blocks() as demo:

    zotero_api_key = gr.Textbox(label="Zotero API Key")

    zotero_library_id = gr.Textbox(label="Zotero Library ID")

    zotero_tag = gr.Textbox(label="Zotero Tag")

    arxiv_category_name = gr.State([])
    radio_arxiv_category_name = gr.Radio(['Computer Vision and Pattern Recognition', 'Computation and Language', 'Artificial Intelligence', 'Robotics'], label="ArXiv Category Query")
    radio_arxiv_category_name.change(fn = category_radio, inputs= radio_arxiv_category_name, outputs= arxiv_category_name)

    arxiv_comment_query = gr.State([])
    radio_arxiv_comment_query = gr.Radio(['CVPR', 'ACL', 'TACL', 'JAIR', 'IJRR', 'None'], label="ArXiv Comment Query")
    radio_arxiv_comment_query.change(fn = comment_radio, inputs= radio_arxiv_comment_query, outputs= arxiv_comment_query)

    threshold = gr.Slider(minimum= 0.70, maximum= 0.99, label="Similarity Score Threshold")

    init_output = gr.Textbox(label="Project Initialization Result")

    rec_output = gr.Markdown(label = "Recommended Papers")

    reset_output = gr.Markdown(label = "Reset Declaration")

    init_btn = gr.Button("Initialize")

    rec_btn = gr.Button("Recommend")

    reset_btn = gr.Button("Reset")

    timer = gr.Timer(value=600)
    timer.tick(reset_project)

    reset_btn.click(fn = reset_project, inputs= [], outputs= [reset_output])

    @init_btn.click(inputs= [zotero_api_key, zotero_library_id, zotero_tag], outputs= [init_output])
    def init(zotero_api_key, zotero_library_id, zotero_tag, hf_api_key = HF_API_KEY,  pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME,  namespace_name = NAMESPACE_NAME):

        logging.basicConfig(filename= 'logfile.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
        logging.info("Project Initialization Script Started (Serverless)")
        
        ids = get_zotero_ids(zotero_api_key, zotero_library_id, zotero_tag)

        df = get_arxiv_papers(ids)

        embeddings, dim = get_hf_embeddings(hf_api_key, df)

        feedback = upload_to_pinecone(pinecone_api_key, index_name, namespace_name, embeddings, dim, df)

        logging.info(feedback)
        if feedback is dict:
            return f"Retrieved {len(ids)} papers from Zotero. Successfully upserted {feedback['upserted_count']} embeddings in {namespace_name} namespace."
        else :
            return feedback
    
    @rec_btn.click(inputs= [arxiv_category_name, arxiv_comment_query, threshold], outputs= [rec_output])
    def recs(arxiv_category_name, arxiv_comment_query, threshold, hf_api_key = HF_API_KEY,  pinecone_api_key = PINECONE_API_KEY, index_name = INDEX_NAME,  namespace_name = NAMESPACE_NAME):
        logging.info("Weekly Script Started (Serverless)")

        df = get_arxiv_papers(category= arxiv_category_name, comment= arxiv_comment_query)

        df = get_new_papers(df)

        if not isinstance(df, pd.DataFrame):
            return df
        
        embeddings, _ = get_hf_embeddings(hf_api_key, df)

        results = recommend_papers(pinecone_api_key, index_name, namespace_name, embeddings, df, threshold)

        return results

demo.launch(share = True)