yuta_hayashi commited on
Commit
cf71fc7
1 Parent(s): 86b56ff

Add application file

Browse files
Files changed (2) hide show
  1. app.py +76 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatAnthropic
2
+ import gradio as gr
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from langchain.schema import AIMessage, HumanMessage, SystemMessage
6
+
7
+ def summarize_with_anthropic(article, api_key, summary_prompt):
8
+ chat = ChatAnthropic(
9
+ anthropic_api_key=api_key,
10
+ model="claude-2",
11
+ max_tokens_to_sample=1024,
12
+ temperature=0,
13
+ )
14
+ prompt_content = f"{summary_prompt}\n\n<article>{article}</article>\n\n"
15
+ messages = [HumanMessage(content=prompt_content)]
16
+ response = chat(messages)
17
+ return response.content
18
+
19
+
20
+ def modify_url_to_target(url):
21
+ if "arxiv.org/pdf" in url:
22
+ modified_url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
23
+ elif "arxiv.org/abs" in url:
24
+ modified_url = url.replace("arxiv.org/abs", "ar5iv.labs.arxiv.org/html")
25
+ else:
26
+ modified_url = url
27
+ return modified_url
28
+
29
+
30
+ def scrape_article_from_url(url):
31
+ modified_url = modify_url_to_target(url)
32
+ response = requests.get(modified_url, allow_redirects=True)
33
+ soup = BeautifulSoup(response.content, 'html.parser')
34
+
35
+ if "arxiv.org/abs" in response.url:
36
+ abstract_section = soup.find('blockquote', {'class': 'abstract'})
37
+ if abstract_section:
38
+ abstract = abstract_section.text.strip().replace("Abstract: ", "")
39
+ return abstract, modified_url
40
+ else:
41
+ return "Abstract not found.", modified_url
42
+ return soup.get_text(), modified_url
43
+
44
+
45
+ def summarize_from_url(
46
+ article_url,
47
+ summary_prompt,
48
+ anthropic_key=None,
49
+ ):
50
+ if not anthropic_key:
51
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
52
+ article_content, summarized_url = scrape_article_from_url(article_url)
53
+ summary = summarize_with_anthropic(article_content, anthropic_key, summary_prompt)
54
+ return summary, summarized_url
55
+
56
+ def main_interface():
57
+ gr_interface = gr.Interface(
58
+ fn=summarize_from_url,
59
+ inputs=[
60
+ gr.Textbox(placeholder="Enter Article URL", label="arXiv URL"),
61
+ gr.Textbox(value="自然な日本語で箇条書きで解説してください。", label="Summary Prompt"),
62
+ gr.Textbox(
63
+ placeholder="Enter Anthropic API Key", label="Anthropic API Key"
64
+ ),
65
+ ],
66
+ outputs=[
67
+ gr.Textbox(label="Summary"),
68
+ gr.Textbox(label="Summarized URL")
69
+ ],
70
+ title="arXiv Summarizer",
71
+ )
72
+
73
+ gr_interface.launch(debug=True)
74
+
75
+ if __name__ == "__main__":
76
+ main_interface()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain
2
+ anthropic
3
+ gradio
4
+ pdfminer.six