harpreetsahota commited on
Commit
7a7f507
1 Parent(s): cb446a2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
5
+
6
+ token = os.environ["HUGGINGFACEHUB_API_TOKEN"]
7
+
8
+ model_id = 'Deci-early-access/DeciLM-7B-instruct-early'
9
+
10
+ SYSTEM_PROMPT_TEMPLATE = """### System: You are an AI assistant that follows instruction extremely well. Help as much as you can.
11
+ ### User:
12
+
13
+ {instruction}
14
+
15
+ ### Assistant:
16
+ """
17
+
18
+ DESCRIPTION = """
19
+ # <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-7B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p>
20
+ <span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-7B-instruct" style="color: #3264ff;">DeciLM-7B-Instruct</a>! DeciLM-6B-Instruct is a 6B parameter instruction-tuned language model and released under the Llama license. It's an instruction-tuned model, not a chat-tuned model; you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span>
21
+ <p><span style='color: #292b47;'>Learn more about the base model <a href="" style="color: #3264ff;">DeciLM-7B.</a></span></p>
22
+ """
23
+
24
+ bnb_config = BitsAndBytesConfig(
25
+ load_in_4bit = True,
26
+ bnb_4bit_compute_dtype=torch.bfloat16
27
+ )
28
+
29
+ if not torch.cuda.is_available():
30
+ DESCRIPTION += 'You need a GPU for this example. Try using colab: '
31
+
32
+ if torch.cuda.is_available():
33
+ model = AutoModelForCausalLM.from_pretrained(model_id,
34
+ device_map="auto",
35
+ trust_remote_code=True,
36
+ quantization_config=bnb_config,
37
+ # use_flash_attention_2=True, #DeciLM doesn't use flash_attention_2
38
+ # this token will be deleted
39
+ token=token
40
+ )
41
+ else:
42
+ model = None
43
+
44
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
45
+ tokenizer.pad_token = tokenizer.eos_token
46
+
47
+ # Function to construct the prompt using the new system prompt template
48
+ def get_prompt_with_template(message: str) -> str:
49
+ return SYSTEM_PROMPT_TEMPLATE.format(instruction=message)
50
+
51
+ # Function to generate the model's response
52
+ def generate_model_response(message: str) -> str:
53
+ prompt = get_prompt_with_template(message)
54
+ inputs = tokenizer(prompt, return_tensors='pt')
55
+ if torch.cuda.is_available():
56
+ inputs = inputs.to('cuda')
57
+ # Include **generate_kwargs to include the user-defined options
58
+ output = model.generate(**inputs,
59
+ max_new_tokens=4096,
60
+ do_sample=True,
61
+ temperature=0.1
62
+ )
63
+ return tokenizer.decode(output[0], skip_special_tokens=True)
64
+
65
+ def extract_response_content(full_response: str) -> str:
66
+ response_start_index = full_response.find("### Assistant:")
67
+ if response_start_index != -1:
68
+ return full_response[response_start_index + len("### Assistant:"):].strip()
69
+ else:
70
+ return full_response
71
+
72
+ def get_response_with_template(message: str) -> str:
73
+ full_response = generate_model_response(message)
74
+ return extract_response_content(full_response)
75
+
76
+
77
+ with gr.Blocks(css="style.css") as demo:
78
+ gr.Markdown(DESCRIPTION)
79
+ gr.DuplicateButton(value='Duplicate Space for private use',
80
+ elem_id='duplicate-button')
81
+ with gr.Group():
82
+ chatbot = gr.Textbox(label='DeciLM-6B-Instruct Output:')
83
+ with gr.Row():
84
+ textbox = gr.Textbox(
85
+ container=False,
86
+ show_label=False,
87
+ placeholder='Type an instruction...',
88
+ scale=10,
89
+ elem_id="textbox"
90
+ )
91
+ submit_button = gr.Button(
92
+ '💬 Submit',
93
+ variant='primary',
94
+ scale=1,
95
+ min_width=0,
96
+ elem_id="submit_button"
97
+ )
98
+
99
+ # Clear button to clear the chat history
100
+ clear_button = gr.Button(
101
+ '🗑️ Clear',
102
+ variant='secondary',
103
+ )
104
+
105
+ clear_button.click(
106
+ fn=lambda: ('',''),
107
+ outputs=[textbox, chatbot],
108
+ queue=False,
109
+ api_name=False,
110
+ )
111
+
112
+ submit_button.click(
113
+ fn=get_response_with_template,
114
+ inputs=textbox,
115
+ outputs= chatbot,
116
+ queue=False,
117
+ api_name=False,
118
+ )
119
+
120
+ gr.Examples(
121
+ examples=[
122
+ 'Write detailed instructions for making chocolate chip pancakes.',
123
+ 'Write a 250-word article about your love of pancakes.',
124
+ 'Explain the plot of Back to the Future in three sentences.',
125
+ 'How do I make a trap beat?',
126
+ 'A step-by-step guide to learning Python in one month.',
127
+ ],
128
+ inputs=textbox,
129
+ outputs=chatbot,
130
+ fn=get_response_with_template,
131
+ cache_examples=True,
132
+ elem_id="examples"
133
+ )
134
+
135
+
136
+ gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciLM-6b-instruct/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")
137
+
138
+ demo.launch()