QINGCHE commited on
Commit
02d932f
1 Parent(s): f04f1cd

unable baidu

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🐨
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 3.32.0
8
  app_file: app.py
9
  timeout: 300
10
  pinned: false
 
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 3.11
8
  app_file: app.py
9
  timeout: 300
10
  pinned: false
__pycache__/outline.cpython-39.pyc CHANGED
Binary files a/__pycache__/outline.cpython-39.pyc and b/__pycache__/outline.cpython-39.pyc differ
 
__pycache__/run.cpython-39.pyc CHANGED
Binary files a/__pycache__/run.cpython-39.pyc and b/__pycache__/run.cpython-39.pyc differ
 
__pycache__/util.cpython-39.pyc CHANGED
Binary files a/__pycache__/util.cpython-39.pyc and b/__pycache__/util.cpython-39.pyc differ
 
app.py CHANGED
@@ -4,10 +4,6 @@ import textInput
4
  from BERT_inference import BertClassificationModel
5
 
6
 
7
-
8
-
9
-
10
-
11
  output = []
12
  keys = []
13
 
@@ -16,7 +12,7 @@ keys = []
16
 
17
  with gr.Blocks(css = ".output {min-height: 500px}") as demo:
18
  #用markdown语法编辑输出一段话
19
- gr.Markdown("# 文本分类系统")
20
  gr.Markdown("请选择要输入的文件或填入文本")
21
  topic_num = gr.Number(label="主题个数")
22
  max_length = gr.Number(label="摘要最大长度")
@@ -42,12 +38,9 @@ with gr.Blocks(css = ".output {min-height: 500px}") as demo:
42
  file_txt_output = gr.File(label="txt格式")
43
  file_docx_output = gr.File(label="docx格式")
44
  file_pdf_output = gr.File(label="pdf格式")
45
- # with gr.Accordion("Open for More!"):
46
- # gr.Markdown("Look at me...")
47
  text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
48
  file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
49
 
50
- try:
51
- demo.queue().launch()
52
- except Exception as e:
53
- print("error",e)
 
4
  from BERT_inference import BertClassificationModel
5
 
6
 
 
 
 
 
7
  output = []
8
  keys = []
9
 
 
12
 
13
  with gr.Blocks(css = ".output {min-height: 500px}") as demo:
14
  #用markdown语法编辑输出一段话
15
+ gr.Markdown("# TSA - 文本整理助手")
16
  gr.Markdown("请选择要输入的文件或填入文本")
17
  topic_num = gr.Number(label="主题个数")
18
  max_length = gr.Number(label="摘要最大长度")
 
38
  file_txt_output = gr.File(label="txt格式")
39
  file_docx_output = gr.File(label="docx格式")
40
  file_pdf_output = gr.File(label="pdf格式")
41
+
 
42
  text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
43
  file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
44
 
45
+
46
+ demo.queue().launch()
 
 
outline.py CHANGED
@@ -65,15 +65,15 @@ def passage_outline(matrix,sentences):
65
  structure = {}
66
  for each in result.keys():
67
  structure[each] =[sentences[i] for i in result[each]]
68
- outline = ""
69
  outline_list = []
70
  for key in sorted(structure.keys()):
71
  outline_list.append(f"主题:")
72
- outline = outline+f"主题:\n"
73
  for sentence in structure[key]:
74
  outline_list.append(sentence)
75
- outline = outline+f"- {sentence}\n"
76
- return outline,outline_list
77
  if __name__ == "__main__":
78
  matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
79
  [0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],
 
65
  structure = {}
66
  for each in result.keys():
67
  structure[each] =[sentences[i] for i in result[each]]
68
+ outl = []
69
  outline_list = []
70
  for key in sorted(structure.keys()):
71
  outline_list.append(f"主题:")
72
+ outl.append(f"主题:\n")
73
  for sentence in structure[key]:
74
  outline_list.append(sentence)
75
+ outl.append(f"- {sentence}\n")
76
+ return outl,outline_list
77
  if __name__ == "__main__":
78
  matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
79
  [0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],
run.py CHANGED
@@ -8,21 +8,6 @@ from inference import BertClassificationModel
8
  # output:file/text/topic_sentence
9
 
10
 
11
- # file_process:
12
- # in util
13
- # read file code
14
- # file to json_text
15
-
16
- # convert:
17
- # in util
18
- # convert code
19
- # json_text to text
20
-
21
- # process:
22
- # in util
23
- # text process code
24
- # del stop seg
25
-
26
  def texClear(article):
27
  sentencesCleared = [util.clean_text(sentence) for sentence in article]
28
  sentencesCleared = [string for string in sentencesCleared if string != '' ]
@@ -36,7 +21,7 @@ def textToAb(sentences, article, topic_num, max_length):
36
  title_dict,title = util.generation(groups, max_length)
37
  # ans:
38
  # {Ai_abstruct:(main_sentence,paragraph)}
39
- print(title)
40
  matrix = inference.inference_matrix(title)
41
 
42
  outl,outline_list = outline.passage_outline(matrix,title)
 
8
  # output:file/text/topic_sentence
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def texClear(article):
12
  sentencesCleared = [util.clean_text(sentence) for sentence in article]
13
  sentencesCleared = [string for string in sentencesCleared if string != '' ]
 
21
  title_dict,title = util.generation(groups, max_length)
22
  # ans:
23
  # {Ai_abstruct:(main_sentence,paragraph)}
24
+ # print(title)
25
  matrix = inference.inference_matrix(title)
26
 
27
  outl,outline_list = outline.passage_outline(matrix,title)
util.py CHANGED
@@ -15,7 +15,7 @@ def post_url(url, headers, payload):
15
 
16
  def seg(text):
17
  text = text.replace('\n', " ")
18
- sentences = re.split(r'(?<=[。!?.!?:])\s*', text)
19
  sentences = [string for string in sentences if string != '']
20
  return sentences
21
 
@@ -72,16 +72,18 @@ def generation(para, max_length):
72
  'Accept': 'application/json'
73
  }
74
 
75
- response = post_url(url, headers, payload)
76
- text_dict = json.loads(response.text)
77
  # print(text_dict)
78
- while('summary' not in text_dict.keys()):
79
- response = post_url(url, headers, payload)
80
- text_dict = json.loads(response.text)
81
- print("ReTrying")
82
-
83
- topic[text_dict['summary']] = (j, k)
84
- Ai_abstract.append(text_dict['summary'])
 
 
85
  return topic,Ai_abstract
86
  def formate_text(title_dict,outline_list):
87
  formated = []
 
15
 
16
  def seg(text):
17
  text = text.replace('\n', " ")
18
+ sentences = re.split(r'(?<=[。!?.!?: ])\s*', text)
19
  sentences = [string for string in sentences if string != '']
20
  return sentences
21
 
 
72
  'Accept': 'application/json'
73
  }
74
 
75
+ # response = post_url(url, headers, payload)
76
+ # text_dict = json.loads(response.text)
77
  # print(text_dict)
78
+ # while('summary' not in text_dict.keys()):
79
+ # response = post_url(url, headers, payload)
80
+ # text_dict = json.loads(response.text)
81
+ # print("ReTrying")
82
+
83
+ # topic[text_dict['summary']] = (j, k)
84
+ # Ai_abstract.append(text_dict['summary'])
85
+ topic[j] = (j, k)
86
+ Ai_abstract.append(j)
87
  return topic,Ai_abstract
88
  def formate_text(title_dict,outline_list):
89
  formated = []