Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,875 Bytes
7b28e1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import gradio
import re
import spaces
import torch
import transformers
MODEL_DICT = {
"NousResearch/Llama-2-7b-chat-hf": "Llama 2 7B Chat",
"xqt/llama_2_7b_chat_mbpp_base": "Llama 2 7B Chat fine tuned with Base MBPP",
"xqt/llama_2_7b_chat_mbpp_synthetic": "Llama 2 7B Chat fine tuned with Synthetic MBPP",
"xqt/llama_2_7b_chat_mbpp_mixed": "Llama 2 7B Chat fine tuned with Base and Synthetic MBPP"
}
def generate_prompt(sample):
sample = f"""<s>[INST] <<SYS>>
You are a python programming assistant that obeys the constraints and passes the example test case.
You wrap the code answer without any comments between [PYTHON] and [/PYTHON] tags.
In case a test case is available, it is written inside [TEST] and [/TEST] tags.
<</SYS>>
{sample}
[TEST][/TEST]
[/INST]
[PYTHON]
"""
return sample
def extract_text_between_tags(input_string, tag1, tag2):
pattern = r'' + tag1 + '(.*?)' + tag2 + ''
return re.findall(pattern, input_string, re.DOTALL)
def load_model(name):
gradio.Info(f"Loading Model {name} π€", duration = 5)
current_key = None
for model_key in MODEL_DICT.keys():
if name == MODEL_DICT[model_key]:
current_key = model_key
if current_key == None:
raise gradio.Error(f"Model {name} could not be found π", duration = 5)
try:
nf4_config = transformers.BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_use_double_quant = True,
bnb_4bit_compute_dtype = torch.bfloat16
)
MODEL = transformers.AutoModelForCausalLM.from_pretrained(
current_key,
quantization_config = nf4_config,
device_map = {"": 0},
use_cache = True
)
MODEL.config.pretraining_tp = 1
MODEL_NAME = current_key
tokenizer = transformers.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
gradio.Info(f"Loaded Model {name} from {current_key} successfully π₯.", duration = 5)
return MODEL, tokenizer
except Exception as e:
raise gradio.Error(f"Encountered a problem π₯Ί: {e}")
return None, None
@spaces.GPU(duration = 120)
def respond(model, message, chat_history):
model, tokenizer = load_model(model)
if model is None or tokenizer is None:
raise gradio.Error("Could not load model π", duration = 5)
prompt = generate_prompt(message)
max_new_tokens = 20
input = None
while max_new_tokens <= 500:
if input is None:
prompt = [prompt]
else:
prompt = [output]
input = tokenizer(prompt, return_tensors = "pt", padding = True).to(model.device)
output_sequences = model.generate(**input, max_new_tokens = 500, do_sample = True, top_p = 0.9)
output = tokenizer.batch_decode(output_sequences, skip_special_tokens = True)[0]
try:
code = extract_text_between_tags(output, r"\[PYTHON\]", r"\[/PYTHON\]")[1]
break
except:
code = ""
max_new_tokens += 20
if len(code) > 0:
response = f"""Here is what I could write π
```python
{code}
```
"""
else:
response = "Could not generate the code with the following configurations π¦."
chat_history.append((message, response))
return "", chat_history
with gradio.Blocks() as base_app:
header = gradio.Markdown("""
# π§βπ» Python Code Generation Assistant
Welcome to the **Python Code Generation Assistant** powered by **Llama 2** models! This application helps generate Python code solutions by leveraging fine-tuned large language models (LLMs) on benchmark and synthetic datasets. Whether you need help solving basic Python problems or want to explore code generation from AI, this app has you covered. Use the documentation below for help.
""")
model_choice_dropdown = gradio.Dropdown(
choices = MODEL_DICT.values(),
value = "Llama 2 7B Chat fine tuned with Base MBPP",
interactive = True
)
chatbot = gradio.Chatbot()
with gradio.Row():
with gradio.Column():
message_box = gradio.Textbox(placeholder = "Write a python programming question you need the code for.")
with gradio.Column():
send_button = gradio.Button()
clear_button = gradio.ClearButton([message_box, chatbot])
send_button.click(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
message_box.submit(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
example_dataset = gradio.Dataset(components = [message_box], samples = [
["Write a function to find sequences of lowercase letters joined with an underscore."],
["Write a python function to count hexadecimal numbers for a given range."],
["Write a function to perform the concatenation of two string tuples."]
])
example_dataset.select(lambda x: x[0], [example_dataset], [message_box])
with gradio.Accordion("Documentation", open = False):
documentation = gradio.Markdown("""
## π Features
- **Model Selection**: Choose from multiple fine-tuned models:
- **Llama 2 7B Chat**: Standard pre-trained model.
- **Llama 2 7B Chat (Base MBPP)**: Fine-tuned on the MBPP benchmark dataset.
- **Llama 2 7B Chat (Synthetic MBPP)**: Fine-tuned on synthetic data generated from MBPP.
- **Llama 2 7B Chat (Mixed)**: Fine-tuned on both base and synthetic MBPP data.
- **Automatic Python Code Generation**: Generate Python code for your problem.
## π― How to Use the App
1. **Select a Model**:
- Use the dropdown menu to choose the model you'd like to use for code generation.
- By default, the app selects **Llama 2 7B Chat fine-tuned with Base MBPP**.
2. **Ask a Python Question**:
- Type a Python-related question or problem in the text box.
- Example: `"Write a function to find sequences of lowercase letters joined with an underscore."`
3. **Generate Python Code**:
- Press the **Send** button or hit **Enter** to generate the code.
- The model will respond with Python code wrapped in ```python blocks.
4. **Explore Example Questions**:
- You can try out some preloaded examples from the **Examples Dataset** at the bottom. Just click on one to automatically populate the input.
5. **Clear the Chat**:
- Use the **Clear** button to reset the chat and start fresh.
## π Models
| Model Name | Description |
|------------|-------------|
| **Llama 2 7B Chat** | A pre-trained model for general Python code generation. |
| **Base MBPP** | Fine-tuned on the **MBPP** (Most Basic Python Problems) dataset. |
| **Synthetic MBPP** | Fine-tuned on a synthetic dataset generated from MBPP. |
| **Mixed MBPP** | Fine-tuned on both base and synthetic MBPP datasets. |
## π οΈ Troubleshooting
If you encounter issues:
- Ensure you're selecting the correct model.
- If the code isn't generating as expected, try reformulating the question.
- For further debugging, error messages will be displayed if something goes wrong.
## π Example Prompts
- `"Write a Python function to count hexadecimal numbers for a given range."`
- `"Write a function to perform the concatenation of two string tuples."`
- `"Generate a Python program to reverse a string."`
## π₯οΈ About the Technology
This app uses a **4-bit quantized version of Llama 2 7B** models to enhance performance while minimizing resource consumption. These models have been fine-tuned on **MBPP** and **synthetic datasets** to provide optimized code generation for Python programming tasks.
---
Happy Coding! πβ¨
""")
if __name__=="__main__":
base_app.launch() |