Spaces:

StevenChen16
/

LLama3-Compliance-Review

Running on Zero

App Files Files Community

StevenChen16 commited on Jun 4

Commit

8f48aeb

•

1 Parent(s): dd91d0c

Update app.py to use multiple threads

Browse files

Files changed (1) hide show

app.py +34 -23

app.py CHANGED Viewed

@@ -3,41 +3,53 @@ from llamafactory.chat import ChatModel
 from llamafactory.extras.misc import torch_gc
 import re
 import spaces
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
 @spaces.GPU(duration=120)
 def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
-    results = []
     total_sentences = len(sentences)
     for i, sentence in enumerate(sentences):
-        progress((i + 1) / total_sentences)
-        messages.append({"role": "user", "content": sentence})
-        sentence_response = ""
-        for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
-            sentence_response += new_text.strip()
-        category = sentence_response.strip().lower().replace(' ', '_')
-        if category != "fair":
-            results.append((sentence, category))
-        else:
-            results.append((sentence, "fair"))
-        messages.append({"role": "assistant", "content": sentence_response})
-        torch_gc()
-    return results
 args = dict(
-  model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO",  # 使用量化的 Llama-3-8B-Instruct 模型
-  # model_name_or_path="StevenChen16/llama3-8b-compliance-review",
-  # adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter",                 # 加载保存的 LoRA 适配器
-  template="llama3",                                      # 与训练时使用的模板相同
-  finetuning_type="lora",                                 # 与训练时使用的微调类型相同
-  quantization_bit=8,                                     # 加载 4-bit 量化模型
-  use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
 )
 chat_model = ChatModel(args)
 messages = []
@@ -56,7 +68,6 @@ label_to_color = {
 }
 with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             input_text = gr.Textbox(label="Input Paragraph", lines=10, placeholder="Enter the paragraph here...")
@@ -71,4 +82,4 @@ with gr.Blocks() as demo:
     btn.click(on_click, inputs=input_text, outputs=[output])
-demo.launch(share=True)

 from llamafactory.extras.misc import torch_gc
 import re
 import spaces
+from threading import Thread
 def split_into_sentences(text):
     sentence_endings = re.compile(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s')
     sentences = sentence_endings.split(text)
     return [sentence.strip() for sentence in sentences if sentence]
+@spaces.GPU(duration=120)
+def process_sentence(sentence, index, results, messages, progress, total_sentences):
+    messages.append({"role": "user", "content": sentence})
+    sentence_response = ""
+    for new_text in chat_model.stream_chat(messages, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=300):
+        sentence_response += new_text.strip()
+    category = sentence_response.strip().lower().replace(' ', '_')
+    if category != "fair":
+        results[index] = (sentence, category)
+    else:
+        results[index] = (sentence, "fair")
+    messages.append({"role": "assistant", "content": sentence_response})
+    torch_gc()
+    progress((index + 1) / total_sentences)
 @spaces.GPU(duration=120)
 def process_paragraph(paragraph, progress=gr.Progress()):
     sentences = split_into_sentences(paragraph)
+    results = [None] * len(sentences)
     total_sentences = len(sentences)
+    threads = []
     for i, sentence in enumerate(sentences):
+        thread = Thread(target=process_sentence, args=(sentence, i, results, messages.copy(), progress, total_sentences))
+        threads.append(thread)
+        thread.start()
+    for thread in threads:
+        thread.join()
+    return results
 args = dict(
+    model_name_or_path="princeton-nlp/Llama-3-Instruct-8B-SimPO",  # 使用量化的 Llama-3-8B-Instruct 模型
+    # model_name_or_path="StevenChen16/llama3-8b-compliance-review",
+    # adapter_name_or_path="StevenChen16/llama3-8b-compliance-review-adapter",                 # 加载保存的 LoRA 适配器
+    template="llama3",                                      # 与训练时使用的模板相同
+    finetuning_type="lora",                                 # 与训练时使用的微调类型相同
+    quantization_bit=8,                                     # 加载 8-bit 量化模型
+    use_unsloth=True,                                       # 使用 UnslothAI 的 LoRA 优化以加速生成
 )
 chat_model = ChatModel(args)
 messages = []
 }
 with gr.Blocks() as demo:
     with gr.Row(equal_height=True):
         with gr.Column():
             input_text = gr.Textbox(label="Input Paragraph", lines=10, placeholder="Enter the paragraph here...")
     btn.click(on_click, inputs=input_text, outputs=[output])
+demo.launch(share=True)