wangrongsheng commited on
Commit
e2af90f
1 Parent(s): 8a972d8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -290,7 +290,7 @@ class Reader:
290
  def __init__(self, key_word='', query='', filter_keys='',
291
  root_path='./',
292
  gitee_key='',
293
- sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', p=1.0, temperature=1.0):
294
  self.key = str(key) # OpenAI key
295
  self.user_name = user_name # 读者姓名
296
  self.key_word = key_word # 读者感兴趣的关键词
@@ -422,7 +422,7 @@ class Reader:
422
 
423
  return image_url
424
 
425
- def summary_with_chat(self, paper_list, key, p, temperature):
426
  htmls = []
427
  utoken = 0
428
  ctoken = 0
@@ -437,7 +437,7 @@ class Reader:
437
  text += list(paper.section_text_dict.values())[0]
438
  #max_token = 2500 * 4
439
  #text = text[:max_token]
440
- chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p=p, temperature=temperature)
441
  htmls.append(chat_summary_text)
442
 
443
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
@@ -455,7 +455,7 @@ class Reader:
455
  # methods
456
  method_text += paper.section_text_dict[method_key]
457
  text = summary_text + "\n<Methods>:\n" + method_text
458
- chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p=p, temperature=temperature)
459
  htmls.append(chat_method_text)
460
  else:
461
  chat_method_text = ''
@@ -478,7 +478,7 @@ class Reader:
478
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
479
  else:
480
  text = summary_text
481
- chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p=p, temperature=temperature)
482
  htmls.append(chat_conclusion_text)
483
  htmls.append("\n")
484
  # token统计
@@ -500,7 +500,7 @@ class Reader:
500
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
501
  stop=tenacity.stop_after_attempt(5),
502
  reraise=True)
503
- def chat_conclusion(self, text, key, p, temperature):
504
  openai.api_key = key
505
  conclusion_prompt_token = 650
506
  text_token = len(self.encoding.encode(text))
@@ -524,7 +524,7 @@ class Reader:
524
  """},
525
  ]
526
  response = openai.ChatCompletion.create(
527
- model="gpt-3.5-turbo",
528
  # prompt需要用英语替换,少占用token。
529
  messages=messages,
530
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
@@ -547,7 +547,7 @@ class Reader:
547
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
548
  stop=tenacity.stop_after_attempt(5),
549
  reraise=True)
550
- def chat_method(self, text, key, p, temperature):
551
  openai.api_key = key
552
  method_prompt_token = 650
553
  text_token = len(self.encoding.encode(text))
@@ -573,7 +573,7 @@ class Reader:
573
  """},
574
  ]
575
  response = openai.ChatCompletion.create(
576
- model="gpt-3.5-turbo",
577
  messages=messages,
578
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
579
  top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
@@ -596,7 +596,7 @@ class Reader:
596
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
597
  stop=tenacity.stop_after_attempt(5),
598
  reraise=True)
599
- def chat_summary(self, text, key, p, temperature):
600
  openai.api_key = key
601
  summary_prompt_token = 1000
602
  text_token = len(self.encoding.encode(text))
@@ -633,7 +633,7 @@ class Reader:
633
  ]
634
 
635
  response = openai.ChatCompletion.create(
636
- model="gpt-3.5-turbo",
637
  messages=messages,
638
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
639
  top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
@@ -667,7 +667,7 @@ class Reader:
667
  print(f"Query: {self.query}")
668
  print(f"Sort: {self.sort}")
669
 
670
- def upload_pdf(key, text, p, temperature, file):
671
  # 检查两个输入都不为空
672
  if not key or not text or not file:
673
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
@@ -679,7 +679,7 @@ def upload_pdf(key, text, p, temperature, file):
679
  paper_list = [Paper(path=file, sl=section_list)]
680
  # 创建一个Reader对象
681
  reader = Reader()
682
- sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p=p, temperature=temperature)
683
  return cost, sum_info
684
 
685
  api_title = "api-key可用验证"
@@ -724,6 +724,7 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
724
  ip = [
725
  gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
726
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
 
727
  gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
728
  gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
729
  gradio.inputs.File(label="请上传论文PDF(必填)")
@@ -733,4 +734,4 @@ chatpaper_gui = gradio.Interface(fn=upload_pdf, inputs=ip, outputs=["json", "htm
733
 
734
  # Start server
735
  gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
736
- gui.launch(quiet=True,show_api=False)
 
290
  def __init__(self, key_word='', query='', filter_keys='',
291
  root_path='./',
292
  gitee_key='',
293
+ sort=arxiv.SortCriterion.SubmittedDate, user_name='defualt', language='cn', key='', model_name="gpt-3.5-turbo", p=1.0, temperature=1.0):
294
  self.key = str(key) # OpenAI key
295
  self.user_name = user_name # 读者姓名
296
  self.key_word = key_word # 读者感兴趣的关键词
 
422
 
423
  return image_url
424
 
425
+ def summary_with_chat(self, paper_list, key, model_name, p, temperature):
426
  htmls = []
427
  utoken = 0
428
  ctoken = 0
 
437
  text += list(paper.section_text_dict.values())[0]
438
  #max_token = 2500 * 4
439
  #text = text[:max_token]
440
+ chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
441
  htmls.append(chat_summary_text)
442
 
443
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
 
455
  # methods
456
  method_text += paper.section_text_dict[method_key]
457
  text = summary_text + "\n<Methods>:\n" + method_text
458
+ chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
459
  htmls.append(chat_method_text)
460
  else:
461
  chat_method_text = ''
 
478
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
479
  else:
480
  text = summary_text
481
+ chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), model_name=str(model_name), p=p, temperature=temperature)
482
  htmls.append(chat_conclusion_text)
483
  htmls.append("\n")
484
  # token统计
 
500
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
501
  stop=tenacity.stop_after_attempt(5),
502
  reraise=True)
503
+ def chat_conclusion(self, text, key, model_name, p, temperature):
504
  openai.api_key = key
505
  conclusion_prompt_token = 650
506
  text_token = len(self.encoding.encode(text))
 
524
  """},
525
  ]
526
  response = openai.ChatCompletion.create(
527
+ model=model_name,
528
  # prompt需要用英语替换,少占用token。
529
  messages=messages,
530
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
 
547
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
548
  stop=tenacity.stop_after_attempt(5),
549
  reraise=True)
550
+ def chat_method(self, text, key, model_name, p, temperature):
551
  openai.api_key = key
552
  method_prompt_token = 650
553
  text_token = len(self.encoding.encode(text))
 
573
  """},
574
  ]
575
  response = openai.ChatCompletion.create(
576
+ model=model_name,
577
  messages=messages,
578
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
579
  top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
 
596
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
597
  stop=tenacity.stop_after_attempt(5),
598
  reraise=True)
599
+ def chat_summary(self, text, key, model_name, p, temperature):
600
  openai.api_key = key
601
  summary_prompt_token = 1000
602
  text_token = len(self.encoding.encode(text))
 
633
  ]
634
 
635
  response = openai.ChatCompletion.create(
636
+ model=model_name,
637
  messages=messages,
638
  temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
639
  top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
 
667
  print(f"Query: {self.query}")
668
  print(f"Sort: {self.sort}")
669
 
670
+ def upload_pdf(key, text, model_name, p, temperature, file):
671
  # 检查两个输入都不为空
672
  if not key or not text or not file:
673
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
 
679
  paper_list = [Paper(path=file, sl=section_list)]
680
  # 创建一个Reader对象
681
  reader = Reader()
682
+ sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, model_name=model_name, p=p, temperature=temperature)
683
  return cost, sum_info
684
 
685
  api_title = "api-key可用验证"
 
724
  ip = [
725
  gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
726
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
727
+ gradio.inputs.Radio(choices=["gpt-3.5-turbo", "gpt-3.5-turbo-0301"], value="gpt-3.5-turbo", label="设备")
728
  gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
729
  gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
730
  gradio.inputs.File(label="请上传论文PDF(必填)")
 
734
 
735
  # Start server
736
  gui = gradio.TabbedInterface(interface_list=[api_gui, chatpaper_gui], tab_names=["API-key", "ChatPaper"])
737
+ gui.launch(quiet=True,show_api=False)