wangrongsheng commited on
Commit
2dea19e
1 Parent(s): f64978e

优化:增加新的model参数

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -422,7 +422,7 @@ class Reader:
422
 
423
  return image_url
424
 
425
- def summary_with_chat(self, paper_list, key):
426
  htmls = []
427
  utoken = 0
428
  ctoken = 0
@@ -437,7 +437,7 @@ class Reader:
437
  text += list(paper.section_text_dict.values())[0]
438
  #max_token = 2500 * 4
439
  #text = text[:max_token]
440
- chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key))
441
  htmls.append(chat_summary_text)
442
 
443
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
@@ -455,7 +455,7 @@ class Reader:
455
  # methods
456
  method_text += paper.section_text_dict[method_key]
457
  text = summary_text + "\n<Methods>:\n" + method_text
458
- chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key))
459
  htmls.append(chat_method_text)
460
  else:
461
  chat_method_text = ''
@@ -478,7 +478,7 @@ class Reader:
478
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
479
  else:
480
  text = summary_text
481
- chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key))
482
  htmls.append(chat_conclusion_text)
483
  htmls.append("\n")
484
  # token统计
@@ -500,7 +500,7 @@ class Reader:
500
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
501
  stop=tenacity.stop_after_attempt(5),
502
  reraise=True)
503
- def chat_conclusion(self, text, key):
504
  openai.api_key = key
505
  conclusion_prompt_token = 650
506
  text_token = len(self.encoding.encode(text))
@@ -527,6 +527,8 @@ class Reader:
527
  model="gpt-3.5-turbo",
528
  # prompt需要用英语替换,少占用token。
529
  messages=messages,
 
 
530
  )
531
 
532
  result = ''
@@ -545,7 +547,7 @@ class Reader:
545
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
546
  stop=tenacity.stop_after_attempt(5),
547
  reraise=True)
548
- def chat_method(self, text, key):
549
  openai.api_key = key
550
  method_prompt_token = 650
551
  text_token = len(self.encoding.encode(text))
@@ -573,6 +575,8 @@ class Reader:
573
  response = openai.ChatCompletion.create(
574
  model="gpt-3.5-turbo",
575
  messages=messages,
 
 
576
  )
577
 
578
  result = ''
@@ -592,7 +596,7 @@ class Reader:
592
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
593
  stop=tenacity.stop_after_attempt(5),
594
  reraise=True)
595
- def chat_summary(self, text, key):
596
  openai.api_key = key
597
  summary_prompt_token = 1000
598
  text_token = len(self.encoding.encode(text))
@@ -631,6 +635,8 @@ class Reader:
631
  response = openai.ChatCompletion.create(
632
  model="gpt-3.5-turbo",
633
  messages=messages,
 
 
634
  )
635
 
636
  result = ''
@@ -661,7 +667,7 @@ class Reader:
661
  print(f"Query: {self.query}")
662
  print(f"Sort: {self.sort}")
663
 
664
- def upload_pdf(key, text, file):
665
  # 检查两个输入都不为空
666
  if not key or not text or not file:
667
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
@@ -673,7 +679,7 @@ def upload_pdf(key, text, file):
673
  paper_list = [Paper(path=file, sl=section_list)]
674
  # 创建一个Reader对象
675
  reader = Reader()
676
- sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key)
677
  return cost, sum_info
678
 
679
  api_title = "api-key可用验证"
@@ -718,6 +724,8 @@ Use ChatGPT to summary the papers.Star our Github [🌟ChatPaper](https://github
718
  ip = [
719
  gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
720
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
 
 
721
  gradio.inputs.File(label="请上传论文PDF(必填)")
722
  ]
723
 
 
422
 
423
  return image_url
424
 
425
+ def summary_with_chat(self, paper_list, key, p, temperature):
426
  htmls = []
427
  utoken = 0
428
  ctoken = 0
 
437
  text += list(paper.section_text_dict.values())[0]
438
  #max_token = 2500 * 4
439
  #text = text[:max_token]
440
+ chat_summary_text, utoken1, ctoken1, ttoken1 = self.chat_summary(text=text, key=str(key), p, temperature)
441
  htmls.append(chat_summary_text)
442
 
443
  # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
 
455
  # methods
456
  method_text += paper.section_text_dict[method_key]
457
  text = summary_text + "\n<Methods>:\n" + method_text
458
+ chat_method_text, utoken2, ctoken2, ttoken2 = self.chat_method(text=text, key=str(key), p, temperature)
459
  htmls.append(chat_method_text)
460
  else:
461
  chat_method_text = ''
 
478
  text = summary_text + "\n <Conclusion>:\n" + conclusion_text
479
  else:
480
  text = summary_text
481
+ chat_conclusion_text, utoken3, ctoken3, ttoken3 = self.chat_conclusion(text=text, key=str(key), p, temperature)
482
  htmls.append(chat_conclusion_text)
483
  htmls.append("\n")
484
  # token统计
 
500
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
501
  stop=tenacity.stop_after_attempt(5),
502
  reraise=True)
503
+ def chat_conclusion(self, text, key, p, temperature):
504
  openai.api_key = key
505
  conclusion_prompt_token = 650
506
  text_token = len(self.encoding.encode(text))
 
527
  model="gpt-3.5-turbo",
528
  # prompt需要用英语替换,少占用token。
529
  messages=messages,
530
+ temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
531
+ top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
532
  )
533
 
534
  result = ''
 
547
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
548
  stop=tenacity.stop_after_attempt(5),
549
  reraise=True)
550
+ def chat_method(self, text, key, p, temperature):
551
  openai.api_key = key
552
  method_prompt_token = 650
553
  text_token = len(self.encoding.encode(text))
 
575
  response = openai.ChatCompletion.create(
576
  model="gpt-3.5-turbo",
577
  messages=messages,
578
+ temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
579
+ top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
580
  )
581
 
582
  result = ''
 
596
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
597
  stop=tenacity.stop_after_attempt(5),
598
  reraise=True)
599
+ def chat_summary(self, text, key, p, temperature):
600
  openai.api_key = key
601
  summary_prompt_token = 1000
602
  text_token = len(self.encoding.encode(text))
 
635
  response = openai.ChatCompletion.create(
636
  model="gpt-3.5-turbo",
637
  messages=messages,
638
+ temperature=temperature, # What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
639
+ top_p=p # An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
640
  )
641
 
642
  result = ''
 
667
  print(f"Query: {self.query}")
668
  print(f"Sort: {self.sort}")
669
 
670
+ def upload_pdf(key, text, p, temperature, file):
671
  # 检查两个输入都不为空
672
  if not key or not text or not file:
673
  return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
 
679
  paper_list = [Paper(path=file, sl=section_list)]
680
  # 创建一个Reader对象
681
  reader = Reader()
682
+ sum_info, cost = reader.summary_with_chat(paper_list=paper_list, key=key, p, temperature)
683
  return cost, sum_info
684
 
685
  api_title = "api-key可用验证"
 
724
  ip = [
725
  gradio.inputs.Textbox(label="请输入你的api-key(必填)", default="", type='password'),
726
  gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
727
+ gradio.inputs.Slider(minimum=-0, maximum=1.0, default=1.0, step=0.05, label="Top-p (nucleus sampling)"),
728
+ gradio.inputs.Slider(minimum=-0, maximum=5.0, default=1.0, step=0.1, label="Temperature"),
729
  gradio.inputs.File(label="请上传论文PDF(必填)")
730
  ]
731