Spaces:

QINGCHE
/

TSA

Sleeping

QINGCHE commited on Jun 19, 2023

Commit

02d932f

•

1 Parent(s): f04f1cd

unable baidu

Files changed (8) hide show

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🐨
 colorFrom: blue
 colorTo: green
 sdk: gradio
-sdk_version: 3.32.0
 app_file: app.py
 timeout: 300
 pinned: false

 colorFrom: blue
 colorTo: green
 sdk: gradio
+sdk_version: 3.11
 app_file: app.py
 timeout: 300
 pinned: false

__pycache__/outline.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/outline.cpython-39.pyc and b/__pycache__/outline.cpython-39.pyc differ

__pycache__/run.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/run.cpython-39.pyc and b/__pycache__/run.cpython-39.pyc differ

__pycache__/util.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/util.cpython-39.pyc and b/__pycache__/util.cpython-39.pyc differ

app.py CHANGED Viewed

@@ -4,10 +4,6 @@ import textInput
 from BERT_inference import BertClassificationModel
 output = []
 keys = []
@@ -16,7 +12,7 @@ keys = []
 with gr.Blocks(css = ".output {min-height: 500px}") as demo:
     #用markdown语法编辑输出一段话
-    gr.Markdown("# 文本分类系统")
     gr.Markdown("请选择要输入的文件或填入文本")
     topic_num = gr.Number(label="主题个数")
     max_length = gr.Number(label="摘要最大长度")
@@ -42,12 +38,9 @@ with gr.Blocks(css = ".output {min-height: 500px}") as demo:
             file_txt_output = gr.File(label="txt格式")
             file_docx_output = gr.File(label="docx格式")
             file_pdf_output = gr.File(label="pdf格式")
-    # with gr.Accordion("Open for More!"):
-    #     gr.Markdown("Look at me...")
     text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
     file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
-try:
-    demo.queue().launch()
-except Exception as e:
-    print("error",e)

 from BERT_inference import BertClassificationModel
 output = []
 keys = []
 with gr.Blocks(css = ".output {min-height: 500px}") as demo:
     #用markdown语法编辑输出一段话
+    gr.Markdown("# TSA - 文本整理助手")
     gr.Markdown("请选择要输入的文件或填入文本")
     topic_num = gr.Number(label="主题个数")
     max_length = gr.Number(label="摘要最大长度")
             file_txt_output = gr.File(label="txt格式")
             file_docx_output = gr.File(label="docx格式")
             file_pdf_output = gr.File(label="pdf格式")
     text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
     file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
+demo.queue().launch()

outline.py CHANGED Viewed

@@ -65,15 +65,15 @@ def passage_outline(matrix,sentences):
     structure = {}
     for each in result.keys():
         structure[each] =[sentences[i] for i in result[each]]
-    outline = ""
     outline_list = []
     for key in sorted(structure.keys()):
         outline_list.append(f"主题：")
-        outline = outline+f"主题：\n"
         for sentence in structure[key]:
             outline_list.append(sentence)
-            outline = outline+f"- {sentence}\n"
-    return outline,outline_list
 if __name__ == "__main__":
     matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
                         [0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],

     structure = {}
     for each in result.keys():
         structure[each] =[sentences[i] for i in result[each]]
+    outl = []
     outline_list = []
     for key in sorted(structure.keys()):
         outline_list.append(f"主题：")
+        outl.append(f"主题：\n")
         for sentence in structure[key]:
             outline_list.append(sentence)
+            outl.append(f"- {sentence}\n")
+    return outl,outline_list
 if __name__ == "__main__":
     matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
                         [0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],

run.py CHANGED Viewed

@@ -8,21 +8,6 @@ from inference import BertClassificationModel
 # output:file/text/topic_sentence
-# file_process:
-# in util
-# read file code
-# file to json_text
-# convert:
-# in util
-# convert code
-# json_text to text
-# process:
-# in util
-# text process code
-# del stop seg
 def texClear(article):
     sentencesCleared = [util.clean_text(sentence) for sentence in article]
     sentencesCleared = [string for string in sentencesCleared if string != '' ]
@@ -36,7 +21,7 @@ def textToAb(sentences, article, topic_num, max_length):
     title_dict,title = util.generation(groups, max_length)
     # ans:
     # {Ai_abstruct:(main_sentence,paragraph)}
-    print(title)
     matrix = inference.inference_matrix(title)
     outl,outline_list = outline.passage_outline(matrix,title)

 # output:file/text/topic_sentence
 def texClear(article):
     sentencesCleared = [util.clean_text(sentence) for sentence in article]
     sentencesCleared = [string for string in sentencesCleared if string != '' ]
     title_dict,title = util.generation(groups, max_length)
     # ans:
     # {Ai_abstruct:(main_sentence,paragraph)}
+    # print(title)
     matrix = inference.inference_matrix(title)
     outl,outline_list = outline.passage_outline(matrix,title)

util.py CHANGED Viewed

@@ -15,7 +15,7 @@ def post_url(url, headers, payload):
 def seg(text):
     text = text.replace('\n', " ")
-    sentences = re.split(r'(?<=[。！？.!?:])\s*', text)
     sentences  = [string for string in sentences if string != '']
     return sentences
@@ -72,16 +72,18 @@ def generation(para, max_length):
             'Accept': 'application/json'
         }
-        response = post_url(url, headers, payload)
-        text_dict = json.loads(response.text)
         # print(text_dict)
-        while('summary' not in text_dict.keys()):
-            response = post_url(url, headers, payload)
-            text_dict = json.loads(response.text)
-            print("ReTrying")
-        topic[text_dict['summary']] = (j, k)
-        Ai_abstract.append(text_dict['summary'])
     return topic,Ai_abstract
 def formate_text(title_dict,outline_list):
     formated = []

 def seg(text):
     text = text.replace('\n', " ")
+    sentences = re.split(r'(?<=[。！？.!?: ])\s*', text)
     sentences  = [string for string in sentences if string != '']
     return sentences
             'Accept': 'application/json'
         }
+        # response = post_url(url, headers, payload)
+        # text_dict = json.loads(response.text)
         # print(text_dict)
+        # while('summary' not in text_dict.keys()):
+        #     response = post_url(url, headers, payload)
+        #     text_dict = json.loads(response.text)
+        #     print("ReTrying")
+        # topic[text_dict['summary']] = (j, k)
+        # Ai_abstract.append(text_dict['summary'])
+        topic[j] = (j, k)
+        Ai_abstract.append(j)
     return topic,Ai_abstract
 def formate_text(title_dict,outline_list):
     formated = []