thliang01 commited on
Commit
8e7da8f
1 Parent(s): 3bdc527

feat: add LLM model

Browse files
Files changed (2) hide show
  1. app.py +106 -6
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,14 +1,114 @@
1
  import gradio as gr
2
  import spaces
 
 
3
  import torch
 
 
 
 
 
 
4
 
5
  zero = torch.Tensor([0]).cuda()
6
  print(zero.device) # <-- 'cpu' 🤔
7
 
8
- @spaces.GPU
9
- def greet(n):
10
- print(zero.device) # <-- 'cuda:0' 🤗
11
- return f"Hello {zero + n} Tensor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import spaces
3
+ import os
4
+ import spaces
5
  import torch
6
+ from transformers import GemmaTokenizer, AutoModelForCausalLM
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
+ from threading import Thread
9
+
10
+ # Set an environment variable
11
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
 
13
  zero = torch.Tensor([0]).cuda()
14
  print(zero.device) # <-- 'cpu' 🤔
15
 
16
+ LICENSE = """
17
+ <p/>
18
+ ---
19
+ Built with Meta Llama 3
20
+ """
21
+
22
+ # Load the tokenizer and model
23
+ tokenizer = AutoTokenizer.from_pretrained("taide/Llama3-TAIDE-LX-8B-Chat-Alpha1")
24
+ model = AutoModelForCausalLM.from_pretrained("taide/Llama3-TAIDE-LX-8B-Chat-Alpha1") # to("cuda:0")
25
+ terminators = [
26
+ tokenizer.eos_token_id,
27
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
28
+ ]
29
+
30
+ @spaces.GPU(duration=120)
31
+ def chat_taide_8b(message: str,
32
+ history: list,
33
+ temperature: float,
34
+ max_new_tokens: int
35
+ ) -> str:
36
+ """
37
+ Generate a streaming response using the llama3-8b model.
38
+ Args:
39
+ message (str): The input message.
40
+ history (list): The conversation history used by ChatInterface.
41
+ temperature (float): The temperature for generating the response.
42
+ max_new_tokens (int): The maximum number of new tokens to generate.
43
+ Returns:
44
+ str: The generated response.
45
+ """
46
+ conversation = []
47
+ for user, assistant in history:
48
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
49
+ conversation.append({"role": "user", "content": message})
50
+
51
+ input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
52
+
53
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
54
+
55
+ generate_kwargs = dict(
56
+ input_ids= input_ids,
57
+ streamer=streamer,
58
+ max_new_tokens=max_new_tokens,
59
+ do_sample=True,
60
+ temperature=temperature,
61
+ eos_token_id=terminators,
62
+ )
63
+ # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
64
+ if temperature == 0:
65
+ generate_kwargs['do_sample'] = False
66
+
67
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
68
+ t.start()
69
+
70
+ outputs = []
71
+ for text in streamer:
72
+ outputs.append(text)
73
+ #print(outputs)
74
+ yield "".join(outputs)
75
+
76
+
77
+ # Gradio block
78
+ chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface')
79
 
80
+ with gr.Blocks(fill_height=True, css=css) as demo:
81
+
82
+ gr.Markdown(DESCRIPTION)
83
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
84
+ gr.ChatInterface(
85
+ fn=chat_taide_8b,
86
+ chatbot=chatbot,
87
+ fill_height=True,
88
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
89
+ additional_inputs=[
90
+ gr.Slider(minimum=0,
91
+ maximum=1,
92
+ step=0.1,
93
+ value=0.95,
94
+ label="Temperature",
95
+ render=False),
96
+ gr.Slider(minimum=128,
97
+ maximum=4096,
98
+ step=1,
99
+ value=512,
100
+ label="Max new tokens",
101
+ render=False ),
102
+ ],
103
+ examples=[
104
+ ['請以以下內容為基礎,寫一篇文章:撰寫一篇作文,題目為《一張舊照片》,內容要求為:選擇一張令你印象深刻的照片,說明令你印象深刻的原因,並描述照片中的影像及背後的故事。記錄成長的過程、與他人的情景、環境變遷和美麗的景色。'],
105
+ ['請以品牌經理的身份,給廣告公司的創意總監寫一封信,提出對於新產品廣告宣傳活動的創意建議。'],
106
+ ['以下提供英文內容,請幫我翻譯成中文。Dongshan coffee is famous for its unique position, and the constant refinement of production methods. The flavor is admired by many caffeine afficionados.'],
107
+ ],
108
+ cache_examples=False,
109
+ )
110
+
111
+ gr.Markdown(LICENSE)
112
+
113
+ if __name__ == "__main__":
114
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ SentencePiece