Kexin2000 commited on
Commit
b9a0194
1 Parent(s): 0945cdf

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +129 -0
  2. requirements.in +3 -0
  3. requirements.txt +62 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ import fitz
3
+ import re
4
+ import numpy as np
5
+ import tensorflow_hub as hub
6
+ import openai
7
+ import gradio as gr
8
+ import os
9
+ from sklearn.neighbors import NearestNeighbors
10
+ import requests
11
+ from cachetools import cached, TTLCache
12
+
13
+ CACHE_TIME = 60 * 60 * 6 # 6小时
14
+
15
+ # 全局的推荐器对象
16
+ recommender = None
17
+
18
+ # 第二个功能的全局变量
19
+ @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
20
+ def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
21
+ try:
22
+ r = requests.post(
23
+ "https://api.semanticscholar.org/recommendations/v1/papers/",
24
+ json={
25
+ "positivePaperIds": [semantic_scholar_id],
26
+ },
27
+ params={"fields": "externalIds,title,year", "limit": 10},
28
+ )
29
+ return r.json()["recommendedPapers"]
30
+ except KeyError as e:
31
+ raise gr.Error(
32
+ "获取推荐时出错,如果这是一篇新论文或尚未被Semantic Scholar索引,则可能尚未有推荐。"
33
+ ) from e
34
+
35
+
36
+ def filter_recommendations(recommendations, max_paper_count=5):
37
+ arxiv_paper = [
38
+ r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
39
+ ]
40
+ if len(arxiv_paper) > max_paper_count:
41
+ arxiv_paper = arxiv_paper[:max_paper_count]
42
+ return arxiv_paper
43
+
44
+
45
+ @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
46
+ def get_paper_title_from_arxiv_id(arxiv_id):
47
+ try:
48
+ return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[
49
+ "title"
50
+ ]
51
+ except Exception as e:
52
+ print(f"获取论文标题时出错 {arxiv_id}: {e}")
53
+ raise gr.Error(f"获取论文标题时出错 {arxiv_id}: {e}") from e
54
+
55
+
56
+ def format_recommendation_into_markdown(arxiv_id, recommendations):
57
+ comment = "以下论文由Semantic Scholar API推荐\n\n"
58
+ for r in recommendations:
59
+ hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
60
+ comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
61
+ return comment
62
+
63
+
64
+ def return_recommendations(url):
65
+ arxiv_id = parse_arxiv_id_from_paper_url(url)
66
+ recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
67
+ filtered_recommendations = filter_recommendations(recommendations)
68
+ return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
69
+
70
+
71
+ # Gradio界面
72
+ title = 'PDF GPT Turbo'
73
+ description = """ PDF GPT Turbo允许您与PDF文件交流。它使用Google的Universal Sentence Encoder与Deep averaging network(DAN)来提供无幻觉的响应,通过提高OpenAI的嵌入质量。它在方括号([Page No.])中引用页码,显示信息的位置,增强了响应的可信度。"""
74
+
75
+ # 预定义的问题
76
+ questions = [
77
+ "研究调查了什么?",
78
+ "能否提供本文的摘要?",
79
+ "这项研究使用了什么方法?",
80
+ # 需要时添加更多的问题
81
+ ]
82
+
83
+ with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
84
+ gr.Markdown(f'<center><h3>{title}</h3></center>')
85
+ gr.Markdown(description)
86
+
87
+ with gr.Row():
88
+ with gr.Group():
89
+ gr.Markdown(f'<p style="text-align:center">在这里获取您的Open AI API密钥 <a href="https://platform.openai.com/account/api-keys">here</a></p>')
90
+ with gr.Accordion("API Key"):
91
+ openAI_key = gr.Textbox(label='在此输入您的OpenAI API密钥', password=True)
92
+ url = gr.Textbox(label='在此输入PDF的URL (示例: https://arxiv.org/pdf/1706.03762.pdf )')
93
+ gr.Markdown("<center><h4>或<h4></center>")
94
+ file = gr.File(label='在此上传您的PDF/研究论文/书籍', file_types=['.pdf'])
95
+ question = gr.Textbox(label='在此输入您的问题')
96
+ gr.Examples(
97
+ [[q] for q in questions],
98
+ inputs=[question],
99
+ label="预定义问题:点击问题以自动填充输入框,然后按Enter键!",
100
+ )
101
+ model = gr.Radio([
102
+ 'gpt-3.5-turbo',
103
+ 'gpt-3.5-turbo-16k',
104
+ 'gpt-3.5-turbo-0613',
105
+ 'gpt-3.5-turbo-16k-0613',
106
+ 'text-davinci-003',
107
+ 'gpt-4',
108
+ 'gpt-4-32k'
109
+ ], label='选择模型', default='gpt-3.5-turbo')
110
+ btn = gr.Button(value='提交')
111
+ btn.style(full_width=True)
112
+ with gr.Group():
113
+ chatbot = gr.Chatbot(placeholder="聊天历史", label="聊天历史", lines=50, elem_id="chatbot")
114
+
115
+ # 将按钮的点击事件绑定到question_answer函数
116
+ btn.click(
117
+ question_answer,
118
+ inputs=[chatbot, url, file, question, openAI_key, model],
119
+ outputs=[chatbot],
120
+ )
121
+
122
+ # 第二个标签
123
+ gr.Tab("论文推荐", [
124
+ gr.Textbox(label="输入Hugging Face Papers的URL", lines=1),
125
+ gr.Button("获取推荐", return_recommendations),
126
+ gr.Markdown(),
127
+ ])
128
+
129
+ demo.launch()
requirements.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ cachetools
2
+ gradio
3
+ requests
requirements.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PyMuPDF
2
+ scikit-learn
3
+ tensorflow
4
+ tensorflow-hub
5
+ openai
6
+ aiofiles==23.2.1
7
+ altair==5.1.1
8
+ annotated-types==0.5.0
9
+ anyio==3.7.1
10
+ attrs==23.1.0
11
+ cachetools==5.3.1
12
+ certifi==2023.7.22
13
+ charset-normalizer==3.2.0
14
+ click==8.1.7
15
+ contourpy==1.1.1
16
+ cycler==0.11.0
17
+ fastapi==0.103.1
18
+ ffmpy==0.3.1
19
+ filelock==3.12.4
20
+ fonttools==4.42.1
21
+ fsspec==2023.9.2
22
+ gradio==3.45.1
23
+ gradio-client==0.5.2
24
+ h11==0.14.0
25
+ httpcore==0.18.0
26
+ httpx==0.25.0
27
+ huggingface-hub==0.17.3
28
+ idna==3.4
29
+ importlib-resources==6.1.0
30
+ jinja2==3.1.2
31
+ jsonschema==4.19.1
32
+ jsonschema-specifications==2023.7.1
33
+ kiwisolver==1.4.5
34
+ markupsafe==2.1.3
35
+ matplotlib==3.8.0
36
+ numpy==1.26.0
37
+ orjson==3.9.7
38
+ packaging==23.1
39
+ pandas==2.1.1
40
+ pillow==10.0.1
41
+ pydantic==2.4.1
42
+ pydantic-core==2.10.1
43
+ pydub==0.25.1
44
+ pyparsing==3.1.1
45
+ python-dateutil==2.8.2
46
+ python-multipart==0.0.6
47
+ pytz==2023.3.post1
48
+ pyyaml==6.0.1
49
+ referencing==0.30.2
50
+ requests==2.31.0
51
+ rpds-py==0.10.3
52
+ semantic-version==2.10.0
53
+ six==1.16.0
54
+ sniffio==1.3.0
55
+ starlette==0.27.0
56
+ toolz==0.12.0
57
+ tqdm==4.66.1
58
+ typing-extensions==4.8.0
59
+ tzdata==2023.3
60
+ urllib3==2.0.5
61
+ uvicorn==0.23.2
62
+ websockets==11.0.3