Spaces:

StevenChen16
/

llama3-8b-Lawyer

Running

App Files Files Community

StevenChen16 commited on May 30

Commit

a9547b0

•

1 Parent(s): 6226cf7

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -19

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import gradio as gr
 import os
 from threading import Thread
-from llamafactory.chat import ChatModel
-from llamafactory.extras.misc import torch_gc
 DESCRIPTION = '''
 <div>
@@ -37,14 +35,9 @@ h1 {
 }
 """
-args = dict(
-    model_name_or_path="StevenChen16/llama3-8b-Lawyer",
-    template="llama3",
-    finetuning_type="lora",
-    quantization_bit=8,
-    use_unsloth=True,
-)
-chat_model = ChatModel(args)
 background_prompt = """
 As an AI legal assistant, you are a highly trained expert in U.S. and Canadian law. Your purpose is to provide accurate, comprehensive, and professional legal information to assist users with a wide range of legal questions. When answering questions, you should actively ask questions to obtain more information, analyze from different perspectives, and explain your reasoning process to the user. Please adhere to the following guidelines:
@@ -81,15 +74,44 @@ As an AI legal assistant, you are a highly trained expert in U.S. and Canadian l
 Please remember that your role is to provide general legal information and analysis, but also to actively guide and interact with the user during the conversation in a personalized and professional manner. If you feel that necessary information is missing to provide targeted analysis and advice, take the initiative to ask until you believe you have sufficient details. However, also be mindful to avoid over-inquiring or disregarding the user's needs and concerns. Now, please guide me step by step to describe the legal issues I am facing, according to the above requirements.
 """
 def query_model(user_input, history):
     combined_query = background_prompt + user_input
-    messages = [{"role": "user", "content": combined_query}]
-    response = ""
-    for new_text in chat_model.stream_chat(messages, temperature=0.9):
-        response += new_text
-        yield response
 # Gradio block
 chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
@@ -110,4 +132,4 @@ with gr.Blocks(css=css) as demo:
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import os
 from threading import Thread
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = '''
 <div>
 }
 """
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("StevenChen16/llama3-8b-Lawyer")
+model = AutoModelForCausalLM.from_pretrained("StevenChen16/llama3-8b-Lawyer", device_map="auto")  # to("cuda:0")
 background_prompt = """
 As an AI legal assistant, you are a highly trained expert in U.S. and Canadian law. Your purpose is to provide accurate, comprehensive, and professional legal information to assist users with a wide range of legal questions. When answering questions, you should actively ask questions to obtain more information, analyze from different perspectives, and explain your reasoning process to the user. Please adhere to the following guidelines:
 Please remember that your role is to provide general legal information and analysis, but also to actively guide and interact with the user during the conversation in a personalized and professional manner. If you feel that necessary information is missing to provide targeted analysis and advice, take the initiative to ask until you believe you have sufficient details. However, also be mindful to avoid over-inquiring or disregarding the user's needs and concerns. Now, please guide me step by step to describe the legal issues I am facing, according to the above requirements.
 """
+terminators = [
+    tokenizer.eos_token_id,
+    tokenizer.convert_tokens_to_ids("")
+]
+def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
+    conversation = []
+    for user, assistant in history:
+        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+    conversation.append({"role": "user", "content": message})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        input_ids= input_ids,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        temperature=temperature,
+        eos_token_id=terminators,
+    )
+    if temperature == 0:
+        generate_kwargs['do_sample'] = False
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
 def query_model(user_input, history):
     combined_query = background_prompt + user_input
+    return chat_llama3_8b(combined_query, history, temperature=0.9, max_new_tokens=512)
 # Gradio block
 chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
+    demo.launch(share=True)