Upload 210 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- .gitignore +48 -0
- Llama2-Code-Interpreter/.gitignore +48 -0
- Llama2-Code-Interpreter/README.md +131 -0
- Llama2-Code-Interpreter/assets/TSLA_90days.png +0 -0
- Llama2-Code-Interpreter/assets/logo.png +0 -0
- Llama2-Code-Interpreter/assets/logo2.png +0 -0
- Llama2-Code-Interpreter/assets/president_code.gif +0 -0
- Llama2-Code-Interpreter/assets/president_code.png +0 -0
- Llama2-Code-Interpreter/assets/result_nvidia_chart.gif +3 -0
- Llama2-Code-Interpreter/assets/tok_hist.png +0 -0
- Llama2-Code-Interpreter/chatbot.py +238 -0
- Llama2-Code-Interpreter/code_interpreter/BaseCodeInterpreter.py +59 -0
- Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreter.py +234 -0
- Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreterDataCollect.py +271 -0
- Llama2-Code-Interpreter/code_interpreter/JuypyterClient.py +75 -0
- Llama2-Code-Interpreter/code_interpreter/LlamaCodeInterpreter.py +286 -0
- Llama2-Code-Interpreter/code_interpreter/RetrospectiveGPTCodeInterpreter.py +472 -0
- Llama2-Code-Interpreter/code_interpreter/__pycache__/JuypyterClient.cpython-311.pyc +0 -0
- Llama2-Code-Interpreter/code_interpreter/__pycache__/LlamaCodeInterpreter.cpython-311.pyc +0 -0
- Llama2-Code-Interpreter/code_interpreter/llama_hf.py +101 -0
- Llama2-Code-Interpreter/eval/eval.md +0 -0
- Llama2-Code-Interpreter/eval/gsm8k.py +115 -0
- Llama2-Code-Interpreter/eval/human_eval.py +289 -0
- Llama2-Code-Interpreter/eval/inference.py +204 -0
- Llama2-Code-Interpreter/finetuning/__pycache__/conversation_template.cpython-311.pyc +0 -0
- Llama2-Code-Interpreter/finetuning/codellama_wrapper.py +21 -0
- Llama2-Code-Interpreter/finetuning/conversation_template.py +80 -0
- Llama2-Code-Interpreter/finetuning/train.py +336 -0
- Llama2-Code-Interpreter/prompt/__init__.py +0 -0
- Llama2-Code-Interpreter/prompt/gpt4_prompt.py +277 -0
- Llama2-Code-Interpreter/requirements.txt +32 -0
- Llama2-Code-Interpreter/utils/__pycache__/special_tok_llama2.cpython-311.pyc +0 -0
- Llama2-Code-Interpreter/utils/check_nb_out.py +20 -0
- Llama2-Code-Interpreter/utils/check_nb_plot_img_out.py +81 -0
- Llama2-Code-Interpreter/utils/cleaner.py +28 -0
- Llama2-Code-Interpreter/utils/const.py +314 -0
- Llama2-Code-Interpreter/utils/convert_llama_weights_to_hf.py +375 -0
- Llama2-Code-Interpreter/utils/special_tok_llama2.py +14 -0
- OpenCodeInterpreter/LICENSE +201 -0
- OpenCodeInterpreter/README.md +83 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/LICENSE +201 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README.md +143 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README_CN.md +140 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.azure.example.json +24 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.example.json +32 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/1.jpg +0 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/2.jpg +0 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/3.jpg +0 -0
- OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/4.jpg +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/result_nvidia_chart.gif filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Llama2-Code-Interpreter/assets/result_nvidia_chart.gif filter=lfs diff=lfs merge=lfs -text
|
38 |
+
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/save_to_notebook_demo.gif filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore .ckpt files
|
2 |
+
ckpt
|
3 |
+
|
4 |
+
# Ignore Python compiled files
|
5 |
+
__pycache__/
|
6 |
+
*.py[cod]
|
7 |
+
|
8 |
+
# Ignore Python virtual environment
|
9 |
+
venv/
|
10 |
+
|
11 |
+
# Ignore Jupyter notebook checkpoints
|
12 |
+
.ipynb_checkpoints/
|
13 |
+
.git/
|
14 |
+
.vscode/
|
15 |
+
|
16 |
+
# Ignore .DS_Store on MacOS
|
17 |
+
.DS_Store
|
18 |
+
|
19 |
+
rilab_key.txt
|
20 |
+
gpt4_custom_code_interpreter/rilab_key.txt
|
21 |
+
openai_api_key.txt
|
22 |
+
|
23 |
+
gpt4_custom_code_interpreter/
|
24 |
+
tmp/
|
25 |
+
output/
|
26 |
+
wandb/
|
27 |
+
|
28 |
+
utils/const.py
|
29 |
+
utils/hf_model_upload.py
|
30 |
+
gpt_data_gen/
|
31 |
+
*.json
|
32 |
+
*.txt
|
33 |
+
*.sh
|
34 |
+
*.pt
|
35 |
+
*.pth
|
36 |
+
*.ckpt
|
37 |
+
*.tokenizer
|
38 |
+
|
39 |
+
# eval data
|
40 |
+
eval/ds1000_data
|
41 |
+
eval/grade-school-math
|
42 |
+
|
43 |
+
# gradio features
|
44 |
+
chatbot_feat.py
|
45 |
+
chatbot_feat2.py
|
46 |
+
gradio_test.py
|
47 |
+
|
48 |
+
|
Llama2-Code-Interpreter/.gitignore
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore .ckpt files
|
2 |
+
ckpt
|
3 |
+
|
4 |
+
# Ignore Python compiled files
|
5 |
+
__pycache__/
|
6 |
+
*.py[cod]
|
7 |
+
|
8 |
+
# Ignore Python virtual environment
|
9 |
+
venv/
|
10 |
+
|
11 |
+
# Ignore Jupyter notebook checkpoints
|
12 |
+
.ipynb_checkpoints/
|
13 |
+
.git/
|
14 |
+
.vscode/
|
15 |
+
|
16 |
+
# Ignore .DS_Store on MacOS
|
17 |
+
.DS_Store
|
18 |
+
|
19 |
+
rilab_key.txt
|
20 |
+
gpt4_custom_code_interpreter/rilab_key.txt
|
21 |
+
openai_api_key.txt
|
22 |
+
|
23 |
+
gpt4_custom_code_interpreter/
|
24 |
+
tmp/
|
25 |
+
output/
|
26 |
+
wandb/
|
27 |
+
|
28 |
+
utils/const.py
|
29 |
+
utils/hf_model_upload.py
|
30 |
+
gpt_data_gen/
|
31 |
+
*.json
|
32 |
+
*.txt
|
33 |
+
*.sh
|
34 |
+
*.pt
|
35 |
+
*.pth
|
36 |
+
*.ckpt
|
37 |
+
*.tokenizer
|
38 |
+
|
39 |
+
# eval data
|
40 |
+
eval/ds1000_data
|
41 |
+
eval/grade-school-math
|
42 |
+
|
43 |
+
# gradio features
|
44 |
+
chatbot_feat.py
|
45 |
+
chatbot_feat2.py
|
46 |
+
gradio_test.py
|
47 |
+
|
48 |
+
|
Llama2-Code-Interpreter/README.md
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<p align="center" width="100%">
|
2 |
+
<img src="/assets/logo2.png" alt="llama2 code interprerter icon" style="width: 200px; height:200px; display: block; margin: auto; border-radius: 50%;">
|
3 |
+
</p>
|
4 |
+
|
5 |
+
|
6 |
+
# Llama2 Code Interpreter
|
7 |
+
|
8 |
+
<p align="center">
|
9 |
+
🤗 <a href="https://huggingface.co/Seungyoun/codellama-7b-instruct-pad" target="_blank">CodeLlama 7B Finetuned Model (HF)</a>
|
10 |
+
</p>
|
11 |
+
|
12 |
+
|
13 |
+
[![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
|
14 |
+
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
|
15 |
+
|
16 |
+
This project allows LLM to generate code, execute it, receive feedback, debug, and answer questions based on the whole process. It is designed to be intuitive and versatile, capable of dealing with multiple languages and frameworks.
|
17 |
+
|
18 |
+
[The purpose and direction of the project](https://github.com/SeungyounShin/Llama2-Code-Interpreter/wiki)
|
19 |
+
|
20 |
+
## Quick Start
|
21 |
+
|
22 |
+
**Run the Gradio App**:
|
23 |
+
```bash
|
24 |
+
python3 chatbot.py --path Seungyoun/codellama-7b-instruct-pad
|
25 |
+
```
|
26 |
+
|
27 |
+
## News
|
28 |
+
|
29 |
+
- 🔥🔥🔥[2023/08/27] We're thrilled to announce that our **[🤗 Llama2 Code Interpreter-7B](https://huggingface.co/Seungyoun/codellama-7b-instruct-pad) (Finetuned from [CodeLlama-7B-Instruct](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf))** model achieved a remarkable **70.12pass@1** on the [HumanEval Benchmarks](https://github.com/openai/human-eval).
|
30 |
+
|
31 |
+
|
32 |
+
**HumanEval**
|
33 |
+
|
34 |
+
| Model | Score(pass@1) |
|
35 |
+
|-------------------------------|--------|
|
36 |
+
| Codellama instruct 7b | 34.8% |
|
37 |
+
| Codellama instruct 7b - finetuning | 70.12% |
|
38 |
+
|
39 |
+
**GSM8K**
|
40 |
+
|
41 |
+
| Model | Score |
|
42 |
+
|-------------------------------|--------|
|
43 |
+
| Code Llama 7B | 13% |
|
44 |
+
| Code Llama 13B | 20.8% |
|
45 |
+
| Codellama instruct 7b - finetuning | 28% |
|
46 |
+
|
47 |
+
|
48 |
+
## 🌟 Key Features
|
49 |
+
|
50 |
+
- [x] 🚀 **Code Generation and Execution**: Llama2 is capable of generating code, which it then automatically identifies and executes within its generated code blocks.
|
51 |
+
- [x] Monitors and retains Python variables that were used in previously executed code blocks.
|
52 |
+
- [x] 🌟 At the moment, my focus is on "Data development for GPT-4 code interpretation" and "Enhancing the model using this data". For more details, check out the [feat/finetuning branch](https://github.com/SeungyounShin/Llama2-Code-Interpreter/tree/feat/finetuning) in our repository.
|
53 |
+
- [x] 🌟 CodeLlama Support [CodeLlama2](https://github.com/facebookresearch/codellama)
|
54 |
+
|
55 |
+
## Examples
|
56 |
+
|
57 |
+
---
|
58 |
+
<div align="center">
|
59 |
+
|
60 |
+
***Llama2 in Action***
|
61 |
+
|
62 |
+
<p align="center" width="100%">
|
63 |
+
<img src="assets/result_nvidia_chart.gif" alt="example1_president_search_with_code" style="width: 600px; display: block; margin: auto; border-radius: 50%;">
|
64 |
+
</p>
|
65 |
+
|
66 |
+
</div>
|
67 |
+
|
68 |
+
In the GIF, Llama2 is seen in action. A user types in the request: `Plot Nvidia 90 days chart.` Llama2, an advanced code interpreter fine-tuned on a select dataset, swiftly queries `Yahoo Finance`. Moments later, it fetches the latest Nvidia stock prices from the past 90 days. Using `Matplotlib`, Llama2 then generates a clear and detailed stock price chart for Nvidia, showcasing its performance over the given period.
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
## Installation
|
73 |
+
|
74 |
+
1. **Clone the Repository (if you haven't already)**:
|
75 |
+
```bash
|
76 |
+
git clone https://github.com/SeungyounShin/Llama2-Code-Interpreter.git
|
77 |
+
cd Llama2-Code-Interpreter
|
78 |
+
```
|
79 |
+
|
80 |
+
2. **Install the required dependencies:**
|
81 |
+
```bash
|
82 |
+
pip install -r requirements.txt
|
83 |
+
```
|
84 |
+
|
85 |
+
---
|
86 |
+
|
87 |
+
### Run App with GPT4 finetunned Llama Model
|
88 |
+
|
89 |
+
To start interacting with Llama2 via the Gradio UI using `codellama-7b-instruct-pad`, follow the steps below:
|
90 |
+
|
91 |
+
|
92 |
+
2. **Run the Gradio App**:
|
93 |
+
```bash
|
94 |
+
python3 chatbot.py --path Seungyoun/codellama-7b-instruct-pad
|
95 |
+
```
|
96 |
+
|
97 |
+
For those who want to use other models:
|
98 |
+
|
99 |
+
### General Instructions to Run App
|
100 |
+
|
101 |
+
To start interacting with Llama2 via the Gradio UI using other models:
|
102 |
+
|
103 |
+
1. **Run the Command**:
|
104 |
+
```bash
|
105 |
+
python3 chatbot.py --model_path <your-model-path>
|
106 |
+
```
|
107 |
+
|
108 |
+
Replace `<your-model-path>` with the path to the model file you wish to use. A recommended model for chat interactions is `meta-llama/Llama-2-13b-chat`.
|
109 |
+
|
110 |
+
## Contributions
|
111 |
+
|
112 |
+
Contributions, issues, and feature requests are welcome! Feel free to check [issues page](https://github.com/SeungyounShin/Llama2-Code-Interpreter/issues).
|
113 |
+
|
114 |
+
## License
|
115 |
+
|
116 |
+
Distributed under the MIT License. See `LICENSE` for more information.
|
117 |
+
|
118 |
+
## Contact
|
119 |
+
|
120 |
+
Seungyoun, Shin - 2022021568@korea.ac.kr
|
121 |
+
|
122 |
+
## Acknowledgement
|
123 |
+
|
124 |
+
Here are some relevant and related projects that have contributed to the development of this work:
|
125 |
+
|
126 |
+
1. **llama2** : [GitHub Repository](https://github.com/facebookresearch/llama)
|
127 |
+
2. **yet-another-gpt-tutorial** : [GitHub Repository](https://github.com/sjchoi86/yet-another-gpt-tutorial/tree/main)
|
128 |
+
|
129 |
+
These projects have been instrumental in providing valuable insights and resources, and their contributions are highly appreciated.
|
130 |
+
|
131 |
+
---
|
Llama2-Code-Interpreter/assets/TSLA_90days.png
ADDED
Llama2-Code-Interpreter/assets/logo.png
ADDED
Llama2-Code-Interpreter/assets/logo2.png
ADDED
Llama2-Code-Interpreter/assets/president_code.gif
ADDED
Llama2-Code-Interpreter/assets/president_code.png
ADDED
Llama2-Code-Interpreter/assets/result_nvidia_chart.gif
ADDED
Git LFS Details
|
Llama2-Code-Interpreter/assets/tok_hist.png
ADDED
Llama2-Code-Interpreter/chatbot.py
ADDED
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import random
|
3 |
+
import time, os
|
4 |
+
import copy
|
5 |
+
import re
|
6 |
+
|
7 |
+
import torch
|
8 |
+
from rich.console import Console
|
9 |
+
from rich.table import Table
|
10 |
+
from datetime import datetime
|
11 |
+
|
12 |
+
from threading import Thread
|
13 |
+
from typing import Optional
|
14 |
+
from transformers import TextIteratorStreamer
|
15 |
+
|
16 |
+
from utils.special_tok_llama2 import (
|
17 |
+
B_CODE,
|
18 |
+
E_CODE,
|
19 |
+
B_RESULT,
|
20 |
+
E_RESULT,
|
21 |
+
B_INST,
|
22 |
+
E_INST,
|
23 |
+
B_SYS,
|
24 |
+
E_SYS,
|
25 |
+
DEFAULT_PAD_TOKEN,
|
26 |
+
DEFAULT_BOS_TOKEN,
|
27 |
+
DEFAULT_EOS_TOKEN,
|
28 |
+
DEFAULT_UNK_TOKEN,
|
29 |
+
IGNORE_INDEX,
|
30 |
+
)
|
31 |
+
|
32 |
+
from finetuning.conversation_template import (
|
33 |
+
json_to_code_result_tok_temp,
|
34 |
+
msg_to_code_result_tok_temp,
|
35 |
+
)
|
36 |
+
|
37 |
+
import warnings
|
38 |
+
|
39 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
|
40 |
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
41 |
+
|
42 |
+
|
43 |
+
from code_interpreter.LlamaCodeInterpreter import LlamaCodeInterpreter
|
44 |
+
|
45 |
+
|
46 |
+
class StreamingLlamaCodeInterpreter(LlamaCodeInterpreter):
|
47 |
+
streamer: Optional[TextIteratorStreamer] = None
|
48 |
+
|
49 |
+
# overwirte generate function
|
50 |
+
@torch.inference_mode()
|
51 |
+
def generate(
|
52 |
+
self,
|
53 |
+
prompt: str = "[INST]\n###User : hi\n###Assistant :",
|
54 |
+
max_new_tokens=512,
|
55 |
+
do_sample: bool = True,
|
56 |
+
use_cache: bool = True,
|
57 |
+
top_p: float = 0.95,
|
58 |
+
temperature: float = 0.1,
|
59 |
+
top_k: int = 50,
|
60 |
+
repetition_penalty: float = 1.0,
|
61 |
+
) -> str:
|
62 |
+
# Get the model and tokenizer, and tokenize the user text.
|
63 |
+
|
64 |
+
self.streamer = TextIteratorStreamer(
|
65 |
+
self.tokenizer, skip_prompt=True, Timeout=5
|
66 |
+
)
|
67 |
+
|
68 |
+
input_prompt = copy.deepcopy(prompt)
|
69 |
+
inputs = self.tokenizer([prompt], return_tensors="pt")
|
70 |
+
input_tokens_shape = inputs["input_ids"].shape[-1]
|
71 |
+
|
72 |
+
eos_token_id = self.tokenizer.convert_tokens_to_ids(DEFAULT_EOS_TOKEN)
|
73 |
+
e_code_token_id = self.tokenizer.convert_tokens_to_ids(E_CODE)
|
74 |
+
|
75 |
+
kwargs = dict(
|
76 |
+
**inputs,
|
77 |
+
max_new_tokens=max_new_tokens,
|
78 |
+
do_sample=do_sample,
|
79 |
+
top_p=top_p,
|
80 |
+
temperature=temperature,
|
81 |
+
use_cache=use_cache,
|
82 |
+
top_k=top_k,
|
83 |
+
repetition_penalty=repetition_penalty,
|
84 |
+
eos_token_id=[
|
85 |
+
eos_token_id,
|
86 |
+
e_code_token_id,
|
87 |
+
], # Stop generation at either EOS or E_CODE token
|
88 |
+
streamer=self.streamer,
|
89 |
+
)
|
90 |
+
|
91 |
+
thread = Thread(target=self.model.generate, kwargs=kwargs)
|
92 |
+
thread.start()
|
93 |
+
|
94 |
+
return ""
|
95 |
+
|
96 |
+
|
97 |
+
def change_markdown_image(text: str):
|
98 |
+
modified_text = re.sub(r"!\[(.*?)\]\(\'(.*?)\'\)", r"![\1](/file=\2)", text)
|
99 |
+
return modified_text
|
100 |
+
|
101 |
+
|
102 |
+
def gradio_launch(model_path: str, load_in_4bit: bool = True, MAX_TRY: int = 5):
|
103 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
104 |
+
chatbot = gr.Chatbot(height=820, avatar_images="./assets/logo2.png")
|
105 |
+
msg = gr.Textbox()
|
106 |
+
clear = gr.Button("Clear")
|
107 |
+
|
108 |
+
interpreter = StreamingLlamaCodeInterpreter(
|
109 |
+
model_path=model_path, load_in_4bit=load_in_4bit
|
110 |
+
)
|
111 |
+
|
112 |
+
def bot(history):
|
113 |
+
user_message = history[-1][0]
|
114 |
+
|
115 |
+
interpreter.dialog.append({"role": "user", "content": user_message})
|
116 |
+
|
117 |
+
print(f"###User : [bold]{user_message}[bold]")
|
118 |
+
# print(f"###Assistant : ")
|
119 |
+
|
120 |
+
# setup
|
121 |
+
HAS_CODE = False # For now
|
122 |
+
INST_END_TOK_FLAG = False
|
123 |
+
full_generated_text = ""
|
124 |
+
prompt = interpreter.dialog_to_prompt(dialog=interpreter.dialog)
|
125 |
+
start_prompt = copy.deepcopy(prompt)
|
126 |
+
prompt = f"{prompt} {E_INST}"
|
127 |
+
|
128 |
+
_ = interpreter.generate(prompt)
|
129 |
+
history[-1][1] = ""
|
130 |
+
generated_text = ""
|
131 |
+
for character in interpreter.streamer:
|
132 |
+
history[-1][1] += character
|
133 |
+
generated_text += character
|
134 |
+
yield history
|
135 |
+
|
136 |
+
full_generated_text += generated_text
|
137 |
+
HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
|
138 |
+
generated_text
|
139 |
+
)
|
140 |
+
|
141 |
+
attempt = 1
|
142 |
+
while HAS_CODE:
|
143 |
+
if attempt > MAX_TRY:
|
144 |
+
break
|
145 |
+
# if no code then doesn't have to execute it
|
146 |
+
|
147 |
+
# refine code block for history
|
148 |
+
history[-1][1] = (
|
149 |
+
history[-1][1]
|
150 |
+
.replace(f"{B_CODE}", "\n```python\n")
|
151 |
+
.replace(f"{E_CODE}", "\n```\n")
|
152 |
+
)
|
153 |
+
history[-1][1] = change_markdown_image(history[-1][1])
|
154 |
+
yield history
|
155 |
+
|
156 |
+
# replace unknown thing to none ''
|
157 |
+
generated_code_block = generated_code_block.replace(
|
158 |
+
"<unk>_", ""
|
159 |
+
).replace("<unk>", "")
|
160 |
+
|
161 |
+
(
|
162 |
+
code_block_output,
|
163 |
+
error_flag,
|
164 |
+
) = interpreter.execute_code_and_return_output(
|
165 |
+
f"{generated_code_block}"
|
166 |
+
)
|
167 |
+
code_block_output = interpreter.clean_code_output(code_block_output)
|
168 |
+
generated_text = (
|
169 |
+
f"{generated_text}\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
|
170 |
+
)
|
171 |
+
full_generated_text += (
|
172 |
+
f"\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
|
173 |
+
)
|
174 |
+
|
175 |
+
# append code output
|
176 |
+
history[-1][1] += f"\n```RESULT\n{code_block_output}\n```\n"
|
177 |
+
history[-1][1] = change_markdown_image(history[-1][1])
|
178 |
+
yield history
|
179 |
+
|
180 |
+
prompt = f"{prompt} {generated_text}"
|
181 |
+
|
182 |
+
_ = interpreter.generate(prompt)
|
183 |
+
for character in interpreter.streamer:
|
184 |
+
history[-1][1] += character
|
185 |
+
generated_text += character
|
186 |
+
history[-1][1] = change_markdown_image(history[-1][1])
|
187 |
+
yield history
|
188 |
+
|
189 |
+
HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
|
190 |
+
generated_text
|
191 |
+
)
|
192 |
+
|
193 |
+
if generated_text.endswith("</s>"):
|
194 |
+
break
|
195 |
+
|
196 |
+
attempt += 1
|
197 |
+
|
198 |
+
interpreter.dialog.append(
|
199 |
+
{
|
200 |
+
"role": "assistant",
|
201 |
+
"content": generated_text.replace("<unk>_", "")
|
202 |
+
.replace("<unk>", "")
|
203 |
+
.replace("</s>", ""),
|
204 |
+
}
|
205 |
+
)
|
206 |
+
|
207 |
+
print("----------\n" * 2)
|
208 |
+
print(interpreter.dialog)
|
209 |
+
print("----------\n" * 2)
|
210 |
+
|
211 |
+
return history[-1][1]
|
212 |
+
|
213 |
+
def user(user_message, history):
|
214 |
+
return "", history + [[user_message, None]]
|
215 |
+
|
216 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
217 |
+
bot, chatbot, chatbot
|
218 |
+
)
|
219 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
220 |
+
|
221 |
+
demo.queue()
|
222 |
+
demo.launch()
|
223 |
+
|
224 |
+
|
225 |
+
if __name__ == "__main__":
|
226 |
+
import argparse
|
227 |
+
|
228 |
+
parser = argparse.ArgumentParser(description="Process path for LLAMA2_FINETUNEED.")
|
229 |
+
parser.add_argument(
|
230 |
+
"--path",
|
231 |
+
type=str,
|
232 |
+
required=True,
|
233 |
+
help="Path to the finetuned LLAMA2 model.",
|
234 |
+
default="./output/llama-2-7b-codellama-ci",
|
235 |
+
)
|
236 |
+
args = parser.parse_args()
|
237 |
+
|
238 |
+
gradio_launch(model_path=args.path, load_in_4bit=True)
|
Llama2-Code-Interpreter/code_interpreter/BaseCodeInterpreter.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import time
|
5 |
+
import re
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import List, Literal, Optional, Tuple, TypedDict, Dict
|
8 |
+
|
9 |
+
prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
10 |
+
sys.path.append(prj_root_path)
|
11 |
+
|
12 |
+
import torch
|
13 |
+
import transformers
|
14 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
15 |
+
|
16 |
+
import nbformat
|
17 |
+
|
18 |
+
# from nbconvert.preprocessors import ExecutePreprocessor
|
19 |
+
# from nbconvert.preprocessors.execute import CellExecutionError
|
20 |
+
|
21 |
+
from utils.const import *
|
22 |
+
from utils.cleaner import clean_error_msg
|
23 |
+
from colorama import init, Fore, Style
|
24 |
+
from rich.markdown import Markdown
|
25 |
+
import base64
|
26 |
+
|
27 |
+
import openai
|
28 |
+
from retrying import retry
|
29 |
+
import logging
|
30 |
+
from termcolor import colored
|
31 |
+
from code_interpreter.JuypyterClient import JupyterNotebook
|
32 |
+
|
33 |
+
|
34 |
+
class BaseCodeInterpreter:
|
35 |
+
def __init__(self):
|
36 |
+
self.dialog = [
|
37 |
+
{
|
38 |
+
"role": "system",
|
39 |
+
"content": CODE_INTERPRETER_SYSTEM_PROMPT,
|
40 |
+
},
|
41 |
+
# {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
|
42 |
+
# {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
|
43 |
+
]
|
44 |
+
|
45 |
+
self.nb = JupyterNotebook()
|
46 |
+
|
47 |
+
@staticmethod
|
48 |
+
def extract_code_blocks(text: str):
|
49 |
+
pattern = r"```(?:python\n)?(.*?)```" # Match optional 'python\n' but don't capture it
|
50 |
+
code_blocks = re.findall(pattern, text, re.DOTALL)
|
51 |
+
return [block.strip() for block in code_blocks]
|
52 |
+
|
53 |
+
@staticmethod
|
54 |
+
def parse_last_answer(text: str) -> str:
|
55 |
+
return text.split(E_INST)[-1]
|
56 |
+
|
57 |
+
def execute_code_and_return_output(self, code_str: str):
|
58 |
+
outputs, error_flag = self.nb.add_and_run(code_str)
|
59 |
+
return outputs, error_flag
|
Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreter.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import time
|
5 |
+
import re
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import List, Literal, Optional, Tuple, TypedDict, Dict
|
8 |
+
|
9 |
+
# Get the path from environment variable
|
10 |
+
prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
11 |
+
sys.path.append(prj_root_path)
|
12 |
+
from code_interpreter.JuypyterClient import JupyterNotebook
|
13 |
+
from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
|
14 |
+
from utils.const import *
|
15 |
+
from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
|
16 |
+
|
17 |
+
# from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
|
18 |
+
from colorama import init, Fore, Style
|
19 |
+
from rich.markdown import Markdown
|
20 |
+
import base64
|
21 |
+
|
22 |
+
import openai
|
23 |
+
from retrying import retry
|
24 |
+
import logging
|
25 |
+
from termcolor import colored
|
26 |
+
|
27 |
+
# load from key file
|
28 |
+
with open("./openai_api_key.txt") as f:
|
29 |
+
OPENAI_API_KEY = key = f.read()
|
30 |
+
openai.api_key = OPENAI_API_KEY
|
31 |
+
from utils.cleaner import clean_error_msg
|
32 |
+
|
33 |
+
|
34 |
+
def remove_string(s):
|
35 |
+
pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}:.*LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n"
|
36 |
+
return re.sub(pattern, "", s)
|
37 |
+
|
38 |
+
|
39 |
+
def clean_the_dialog(dialog, question):
|
40 |
+
question_idx = 0
|
41 |
+
for idx, item in enumerate(dialog):
|
42 |
+
if item["content"] == question:
|
43 |
+
question_idx = idx
|
44 |
+
|
45 |
+
filtered_dialog = dialog[question_idx:]
|
46 |
+
|
47 |
+
user_qinit_dict = filtered_dialog[0]
|
48 |
+
answer_fuse_str = "\n".join([i["content"].strip() for i in filtered_dialog[1::2]])
|
49 |
+
|
50 |
+
final_dialog_dict = [
|
51 |
+
{"role": "user", "content": user_qinit_dict["content"]},
|
52 |
+
{"role": "assistant", "content": answer_fuse_str},
|
53 |
+
]
|
54 |
+
|
55 |
+
return final_dialog_dict
|
56 |
+
|
57 |
+
|
58 |
+
class GPTCodeInterpreter(BaseCodeInterpreter):
|
59 |
+
def __init__(self, model="gpt-4"):
|
60 |
+
self.model = model
|
61 |
+
self.dialog = [
|
62 |
+
# {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT },
|
63 |
+
{
|
64 |
+
"role": "system",
|
65 |
+
"content": CODE_INTERPRETER_SYSTEM_PROMPT,
|
66 |
+
},
|
67 |
+
# {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
|
68 |
+
# {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
|
69 |
+
]
|
70 |
+
|
71 |
+
# self.dialog += few_shot_4
|
72 |
+
self.response = None
|
73 |
+
|
74 |
+
assert os.path.isfile(
|
75 |
+
"./openai_api_key.txt"
|
76 |
+
), "The openai_api_key.txt file could not be found. Please make sure it is in the same directory as this script, and that it contains your OpenAI API key."
|
77 |
+
|
78 |
+
# load from key file
|
79 |
+
with open("./openai_api_key.txt") as f:
|
80 |
+
OPENAI_API_KEY = f.read()
|
81 |
+
openai.api_key = OPENAI_API_KEY
|
82 |
+
|
83 |
+
self.nb = JupyterNotebook()
|
84 |
+
out = self.nb.add_and_run(TOOLS_CODE) # tool import
|
85 |
+
|
86 |
+
def get_response_content(self):
|
87 |
+
if self.response:
|
88 |
+
return self.response["choices"][0]["message"]["content"]
|
89 |
+
else:
|
90 |
+
return None
|
91 |
+
|
92 |
+
@retry(
|
93 |
+
stop_max_attempt_number=7,
|
94 |
+
wait_exponential_multiplier=1000,
|
95 |
+
wait_exponential_max=10000,
|
96 |
+
)
|
97 |
+
def ChatCompletion(self):
|
98 |
+
try:
|
99 |
+
self.response = openai.ChatCompletion.create(
|
100 |
+
model=self.model, messages=self.dialog, temperature=0.2, top_p=0.9
|
101 |
+
)
|
102 |
+
except Exception as e:
|
103 |
+
print(f"error while OPENAI api call {e}")
|
104 |
+
|
105 |
+
def close(self):
|
106 |
+
"""
|
107 |
+
close jupyter notebook, and this class instance
|
108 |
+
"""
|
109 |
+
self.nb.close()
|
110 |
+
|
111 |
+
def save_dialog(self, path: str = "./output/dialog.json"):
|
112 |
+
with open(path, "w") as f:
|
113 |
+
json.dump(self.dialog, f)
|
114 |
+
print(f" ++Dialog saved to [{path}]")
|
115 |
+
|
116 |
+
def chat(
|
117 |
+
self,
|
118 |
+
user_message: str,
|
119 |
+
VERBOSE: bool = False,
|
120 |
+
MAX_TRY: int = 6,
|
121 |
+
code_exec_prefix: str = "",
|
122 |
+
feedback_prompt: str = "",
|
123 |
+
append_result: bool = True,
|
124 |
+
):
|
125 |
+
self.dialog.append({"role": "user", "content": user_message})
|
126 |
+
|
127 |
+
code_block_output = ""
|
128 |
+
attempt = 0
|
129 |
+
img_data = None
|
130 |
+
|
131 |
+
if VERBOSE:
|
132 |
+
print(
|
133 |
+
"###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
|
134 |
+
)
|
135 |
+
print("\n###Assistant : ")
|
136 |
+
|
137 |
+
for i in range(MAX_TRY):
|
138 |
+
# GPT response
|
139 |
+
self.ChatCompletion()
|
140 |
+
|
141 |
+
# Get code block
|
142 |
+
generated_text = self.get_response_content()
|
143 |
+
generated_code_blocks = self.extract_code_blocks(generated_text)
|
144 |
+
# execute code
|
145 |
+
if len(generated_code_blocks) > 0:
|
146 |
+
# Find the position of the first code block in the last answer
|
147 |
+
first_code_block_pos = (
|
148 |
+
generated_text.find(generated_code_blocks[0])
|
149 |
+
if generated_code_blocks
|
150 |
+
else -1
|
151 |
+
)
|
152 |
+
text_before_first_code_block = (
|
153 |
+
generated_text
|
154 |
+
if first_code_block_pos == -1
|
155 |
+
else generated_text[:first_code_block_pos]
|
156 |
+
)
|
157 |
+
if VERBOSE:
|
158 |
+
print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
|
159 |
+
if VERBOSE:
|
160 |
+
print(
|
161 |
+
Fore.YELLOW
|
162 |
+
+ generated_code_blocks[0]
|
163 |
+
+ "\n```\n"
|
164 |
+
+ Style.RESET_ALL
|
165 |
+
)
|
166 |
+
code_block_output, error_flag = self.execute_code_and_return_output(
|
167 |
+
generated_code_blocks[0]
|
168 |
+
)
|
169 |
+
|
170 |
+
code_block_output = f"{code_block_output}"
|
171 |
+
|
172 |
+
if code_block_output is not None:
|
173 |
+
code_block_output = code_block_output.strip()
|
174 |
+
|
175 |
+
code_block_output = remove_string(code_block_output)
|
176 |
+
if len(code_block_output) > 500:
|
177 |
+
code_block_output = (
|
178 |
+
code_block_output[:200] + "⋯(skip)⋯" + code_block_output[-200:]
|
179 |
+
)
|
180 |
+
code_block_output_str = f"\n```RESULT\n{code_block_output}\n```\n"
|
181 |
+
if append_result:
|
182 |
+
gen_final = f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}"
|
183 |
+
if VERBOSE:
|
184 |
+
print(
|
185 |
+
Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL
|
186 |
+
)
|
187 |
+
else:
|
188 |
+
gen_final = (
|
189 |
+
f"{text_before_first_code_block}{generated_code_blocks[0]}\n```"
|
190 |
+
)
|
191 |
+
|
192 |
+
self.dialog.append(
|
193 |
+
{
|
194 |
+
"role": "assistant",
|
195 |
+
"content": gen_final,
|
196 |
+
}
|
197 |
+
)
|
198 |
+
|
199 |
+
if len(feedback_prompt) < 5:
|
200 |
+
feedback_dict = {
|
201 |
+
"role": "user",
|
202 |
+
"content": "Keep going. if you think debugging tell me where you got wrong and better code.\nNeed conclusion to question only text (Do not leave result part alone).\nif doesn't need to generated anything then just say <done>",
|
203 |
+
}
|
204 |
+
else:
|
205 |
+
feedback_dict = {
|
206 |
+
"role": "user",
|
207 |
+
"content": f"{feedback_prompt}",
|
208 |
+
}
|
209 |
+
|
210 |
+
self.dialog.append(feedback_dict)
|
211 |
+
|
212 |
+
else:
|
213 |
+
if "<done>" in generated_text:
|
214 |
+
generated_text = generated_text.split("<done>")[0].strip()
|
215 |
+
|
216 |
+
if len(generated_text) <= 0:
|
217 |
+
break
|
218 |
+
|
219 |
+
if VERBOSE:
|
220 |
+
print(Fore.GREEN + generated_text + Style.RESET_ALL)
|
221 |
+
|
222 |
+
self.dialog.append(
|
223 |
+
{
|
224 |
+
"role": "assistant",
|
225 |
+
"content": f"{generated_text}",
|
226 |
+
}
|
227 |
+
)
|
228 |
+
break
|
229 |
+
|
230 |
+
self.dialog = [self.dialog[0]] + clean_the_dialog(
|
231 |
+
self.dialog, question=user_message
|
232 |
+
) # delete retrospections after generation step
|
233 |
+
|
234 |
+
return self.dialog[-1]
|
Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreterDataCollect.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os, sys
|
3 |
+
import time
|
4 |
+
import re
|
5 |
+
from pathlib import Path
|
6 |
+
from typing import List, Literal, Optional, Tuple, TypedDict, Dict
|
7 |
+
|
8 |
+
# Get the path from environment variable
|
9 |
+
prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
10 |
+
sys.path.append(prj_root_path)
|
11 |
+
from code_interpreter.JuypyterClient import JupyterNotebook
|
12 |
+
from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
|
13 |
+
from utils.const import *
|
14 |
+
from colorama import init, Fore, Style
|
15 |
+
from rich.markdown import Markdown
|
16 |
+
import base64
|
17 |
+
|
18 |
+
import openai
|
19 |
+
from retrying import retry
|
20 |
+
import logging
|
21 |
+
from termcolor import colored
|
22 |
+
|
23 |
+
# load from key file
|
24 |
+
with open("./openai_api_key.txt") as f:
|
25 |
+
OPENAI_API_KEY = key = f.read()
|
26 |
+
openai.api_key = OPENAI_API_KEY
|
27 |
+
from utils.cleaner import clean_error_msg
|
28 |
+
from prompt.gpt4_prompt import *
|
29 |
+
|
30 |
+
|
31 |
+
def remove_string(s):
|
32 |
+
pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}:.*LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n"
|
33 |
+
return re.sub(pattern, "", s)
|
34 |
+
|
35 |
+
|
36 |
+
def gen_questions(prefix="What is 55th fibonacci number?"):
|
37 |
+
response = openai.ChatCompletion.create(
|
38 |
+
model="gpt-4",
|
39 |
+
messages=[
|
40 |
+
{
|
41 |
+
"role": "system",
|
42 |
+
"content": "You are teacherGPT, You need to generate only questions(to student not the explanation and solution) based on student history. \n\nGive him only one question.\n\nAlso remember that student can use code. ",
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"role": "user",
|
46 |
+
"content": f"{prefix}\nmore harder one but not the similar domain of above.",
|
47 |
+
},
|
48 |
+
],
|
49 |
+
temperature=0.1,
|
50 |
+
max_tokens=300,
|
51 |
+
top_p=1,
|
52 |
+
frequency_penalty=0,
|
53 |
+
presence_penalty=0,
|
54 |
+
)
|
55 |
+
return response["choices"][0]["message"]["content"]
|
56 |
+
|
57 |
+
|
58 |
+
def save_dialog(dialog, base_path: str = f"{prj_root_path}/gpt_data_gen"):
|
59 |
+
file_number = 0
|
60 |
+
while True:
|
61 |
+
# Construct the path
|
62 |
+
file_name = f"{file_number}.json"
|
63 |
+
full_path = os.path.join(base_path, file_name)
|
64 |
+
|
65 |
+
# Check if the file already exists
|
66 |
+
if not os.path.exists(full_path):
|
67 |
+
# If not, save the file
|
68 |
+
with open(full_path, "w") as f:
|
69 |
+
json.dump(dialog, f)
|
70 |
+
print(f"Dialog saved to {full_path}")
|
71 |
+
break
|
72 |
+
else:
|
73 |
+
# If the file does exist, increment the file number and try again
|
74 |
+
file_number += 1
|
75 |
+
|
76 |
+
|
77 |
+
def clean_the_dialog(dialog, question):
|
78 |
+
question_idx = 0
|
79 |
+
for idx, item in enumerate(dialog):
|
80 |
+
if item["content"] == question:
|
81 |
+
question_idx = idx
|
82 |
+
|
83 |
+
filtered_dialog = dialog[question_idx:]
|
84 |
+
|
85 |
+
user_qinit_dict = filtered_dialog[0]
|
86 |
+
answer_fuse_str = "\n".join([i["content"].strip() for i in filtered_dialog[1::2]])
|
87 |
+
|
88 |
+
final_dialog_dict = [
|
89 |
+
{"role": "user", "content": user_qinit_dict["content"]},
|
90 |
+
{"role": "assistant", "content": answer_fuse_str},
|
91 |
+
]
|
92 |
+
|
93 |
+
return final_dialog_dict
|
94 |
+
|
95 |
+
|
96 |
+
class GPTCodeInterpreter(BaseCodeInterpreter):
|
97 |
+
def __init__(self, model="gpt-4"):
|
98 |
+
self.model = model
|
99 |
+
self.dialog = [
|
100 |
+
# {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT },
|
101 |
+
{
|
102 |
+
"role": "system",
|
103 |
+
"content": CODE_INTERPRETER_SYSTEM_PROMPT + "\n" + extra_prompt,
|
104 |
+
},
|
105 |
+
# {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
|
106 |
+
# {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
|
107 |
+
]
|
108 |
+
|
109 |
+
self.dialog += few_shot_1
|
110 |
+
# self.dialog += few_shot_4
|
111 |
+
self.response = None
|
112 |
+
|
113 |
+
assert os.path.isfile(
|
114 |
+
"./openai_api_key.txt"
|
115 |
+
), "The openai_api_key.txt file could not be found. Please make sure it is in the same directory as this script, and that it contains your OpenAI API key."
|
116 |
+
|
117 |
+
# load from key file
|
118 |
+
with open("./openai_api_key.txt") as f:
|
119 |
+
OPENAI_API_KEY = f.read()
|
120 |
+
openai.api_key = OPENAI_API_KEY
|
121 |
+
|
122 |
+
self.nb = JupyterNotebook()
|
123 |
+
out = self.nb.add_and_run(TOOLS_CODE) # tool import
|
124 |
+
|
125 |
+
def get_response_content(self):
|
126 |
+
if self.response:
|
127 |
+
return self.response["choices"][0]["message"]["content"]
|
128 |
+
else:
|
129 |
+
return None
|
130 |
+
|
131 |
+
@retry(
|
132 |
+
stop_max_attempt_number=7,
|
133 |
+
wait_exponential_multiplier=1000,
|
134 |
+
wait_exponential_max=10000,
|
135 |
+
)
|
136 |
+
def ChatCompletion(self):
|
137 |
+
try:
|
138 |
+
self.response = openai.ChatCompletion.create(
|
139 |
+
model=self.model, messages=self.dialog, temperature=0.1, top_p=1.0
|
140 |
+
)
|
141 |
+
except Exception as e:
|
142 |
+
print(f"error while OPENAI api call {e}")
|
143 |
+
|
144 |
+
def chat(self, user_message: str, VERBOSE: bool = False, MAX_RETRY: int = 6):
|
145 |
+
self.dialog.append({"role": "user", "content": user_message})
|
146 |
+
|
147 |
+
code_block_output = ""
|
148 |
+
attempt = 0
|
149 |
+
img_data = None
|
150 |
+
|
151 |
+
if VERBOSE:
|
152 |
+
print(
|
153 |
+
"###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
|
154 |
+
)
|
155 |
+
print("\n###Assistant : ")
|
156 |
+
|
157 |
+
for i in range(MAX_RETRY):
|
158 |
+
# GPT response
|
159 |
+
self.ChatCompletion()
|
160 |
+
|
161 |
+
# Get code block
|
162 |
+
generated_text = self.get_response_content()
|
163 |
+
generated_code_blocks = self.extract_code_blocks(generated_text)
|
164 |
+
# execute code
|
165 |
+
if len(generated_code_blocks) > 0:
|
166 |
+
# Find the position of the first code block in the last answer
|
167 |
+
first_code_block_pos = (
|
168 |
+
generated_text.find(generated_code_blocks[0])
|
169 |
+
if generated_code_blocks
|
170 |
+
else -1
|
171 |
+
)
|
172 |
+
text_before_first_code_block = (
|
173 |
+
generated_text
|
174 |
+
if first_code_block_pos == -1
|
175 |
+
else generated_text[:first_code_block_pos]
|
176 |
+
)
|
177 |
+
if VERBOSE:
|
178 |
+
print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
|
179 |
+
if VERBOSE:
|
180 |
+
print(
|
181 |
+
Fore.YELLOW
|
182 |
+
+ generated_code_blocks[0]
|
183 |
+
+ "\n```\n"
|
184 |
+
+ Style.RESET_ALL
|
185 |
+
)
|
186 |
+
code_block_output, error_flag = self.execute_code_and_return_output(
|
187 |
+
generated_code_blocks[0]
|
188 |
+
)
|
189 |
+
|
190 |
+
code_block_output = f"{code_block_output}"
|
191 |
+
|
192 |
+
if code_block_output is not None:
|
193 |
+
code_block_output = code_block_output.strip()
|
194 |
+
|
195 |
+
code_block_output = remove_string(code_block_output)
|
196 |
+
if len(code_block_output) > 500:
|
197 |
+
code_block_output = (
|
198 |
+
code_block_output[:200] + "⋯(skip)⋯" + code_block_output[-200:]
|
199 |
+
)
|
200 |
+
code_block_output_str = f"\n```RESULT\n{code_block_output}\n```\n"
|
201 |
+
if VERBOSE:
|
202 |
+
print(Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL)
|
203 |
+
# markdown = Markdown(code_block_output_str)print(markdown)
|
204 |
+
|
205 |
+
gen_final = f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}"
|
206 |
+
|
207 |
+
self.dialog.append(
|
208 |
+
{
|
209 |
+
"role": "assistant",
|
210 |
+
"content": f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}",
|
211 |
+
}
|
212 |
+
)
|
213 |
+
|
214 |
+
self.dialog.append(
|
215 |
+
{
|
216 |
+
"role": "user",
|
217 |
+
"content": "Keep going. if you think debugging generate code. need conclusion to question only text (Do not leave result part alone). Doesn't need to generated anything then just say <done>",
|
218 |
+
}
|
219 |
+
)
|
220 |
+
|
221 |
+
else:
|
222 |
+
if "<done>" in generated_text:
|
223 |
+
generated_text = generated_text.split("<done>")[0].strip()
|
224 |
+
|
225 |
+
if len(generated_text) <= 0:
|
226 |
+
break
|
227 |
+
|
228 |
+
if VERBOSE:
|
229 |
+
print(Fore.GREEN + generated_text + Style.RESET_ALL)
|
230 |
+
|
231 |
+
self.dialog.append(
|
232 |
+
{
|
233 |
+
"role": "assistant",
|
234 |
+
"content": f"{generated_text}",
|
235 |
+
}
|
236 |
+
)
|
237 |
+
break
|
238 |
+
|
239 |
+
return self.dialog[-1]
|
240 |
+
|
241 |
+
|
242 |
+
if __name__ == "__main__":
|
243 |
+
import random
|
244 |
+
|
245 |
+
SEED_TASK = [
|
246 |
+
# "Resize this image to 512x512\nUser Uploaded File : './tmp/img.png'",
|
247 |
+
"Write a Python script that retrieves Google Trends data for a given keyword and stock price data for a specific company over the same timeframe, normalizes both datasets to the same scale, and then plots them on the same graph to analyze potential correlations.",
|
248 |
+
"Could you conduct a frequency analysis on Apple's stock price to determine any cyclic patterns that occur on a weekly, monthly, or quarterly basis?",
|
249 |
+
]
|
250 |
+
|
251 |
+
questions = SEED_TASK
|
252 |
+
|
253 |
+
from tqdm import tqdm
|
254 |
+
|
255 |
+
for i in tqdm(range(150000)):
|
256 |
+
interpreter = GPTCodeInterpreter()
|
257 |
+
|
258 |
+
question = questions[i]
|
259 |
+
output = interpreter.chat(user_message=question, VERBOSE=True, MAX_RETRY=5)
|
260 |
+
|
261 |
+
sample = clean_the_dialog(interpreter.dialog, question)
|
262 |
+
|
263 |
+
save_dialog(sample)
|
264 |
+
|
265 |
+
# q1,q2,q3 = random.sample(questions, k=3)
|
266 |
+
# question = gen_questions(prefix = f'{q1}\n{q2}\n{q3}')
|
267 |
+
# questions.append(question)
|
268 |
+
|
269 |
+
del interpreter
|
270 |
+
|
271 |
+
print(f"new question :: {question}")
|
Llama2-Code-Interpreter/code_interpreter/JuypyterClient.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from jupyter_client import KernelManager
|
2 |
+
import threading
|
3 |
+
import re
|
4 |
+
|
5 |
+
|
6 |
+
class JupyterNotebook:
|
7 |
+
def __init__(self):
|
8 |
+
self.km = KernelManager()
|
9 |
+
self.km.start_kernel()
|
10 |
+
self.kc = self.km.client()
|
11 |
+
|
12 |
+
def clean_output(self, outputs):
|
13 |
+
outputs_only_str = list()
|
14 |
+
for i in outputs:
|
15 |
+
if type(i) == dict:
|
16 |
+
if "text/plain" in list(i.keys()):
|
17 |
+
outputs_only_str.append(i["text/plain"])
|
18 |
+
elif type(i) == str:
|
19 |
+
outputs_only_str.append(i)
|
20 |
+
elif type(i) == list:
|
21 |
+
error_msg = "\n".join(i)
|
22 |
+
error_msg = re.sub(r"\x1b\[.*?m", "", error_msg)
|
23 |
+
outputs_only_str.append(error_msg)
|
24 |
+
|
25 |
+
return "\n".join(outputs_only_str).strip()
|
26 |
+
|
27 |
+
def add_and_run(self, code_string):
|
28 |
+
# This inner function will be executed in a separate thread
|
29 |
+
def run_code_in_thread():
|
30 |
+
nonlocal outputs, error_flag
|
31 |
+
|
32 |
+
# Execute the code and get the execution count
|
33 |
+
msg_id = self.kc.execute(code_string)
|
34 |
+
|
35 |
+
while True:
|
36 |
+
try:
|
37 |
+
msg = self.kc.get_iopub_msg(timeout=20)
|
38 |
+
|
39 |
+
msg_type = msg["header"]["msg_type"]
|
40 |
+
content = msg["content"]
|
41 |
+
|
42 |
+
if msg_type == "execute_result":
|
43 |
+
outputs.append(content["data"])
|
44 |
+
elif msg_type == "stream":
|
45 |
+
outputs.append(content["text"])
|
46 |
+
elif msg_type == "error":
|
47 |
+
error_flag = True
|
48 |
+
outputs.append(content["traceback"])
|
49 |
+
|
50 |
+
# If the execution state of the kernel is idle, it means the cell finished executing
|
51 |
+
if msg_type == "status" and content["execution_state"] == "idle":
|
52 |
+
break
|
53 |
+
except:
|
54 |
+
break
|
55 |
+
|
56 |
+
outputs = []
|
57 |
+
error_flag = False
|
58 |
+
|
59 |
+
# Start the thread to run the code
|
60 |
+
thread = threading.Thread(target=run_code_in_thread)
|
61 |
+
thread.start()
|
62 |
+
|
63 |
+
# Wait for 10 seconds for the thread to finish
|
64 |
+
thread.join(timeout=10)
|
65 |
+
|
66 |
+
# If the thread is still alive after 10 seconds, it's a timeout
|
67 |
+
if thread.is_alive():
|
68 |
+
outputs = ["Timeout after 10 seconds"]
|
69 |
+
error_flag = True
|
70 |
+
|
71 |
+
return self.clean_output(outputs), error_flag
|
72 |
+
|
73 |
+
def close(self):
|
74 |
+
"""Shutdown the kernel."""
|
75 |
+
self.km.shutdown_kernel()
|
Llama2-Code-Interpreter/code_interpreter/LlamaCodeInterpreter.py
ADDED
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
|
4 |
+
prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
5 |
+
sys.path.append(prj_root_path)
|
6 |
+
|
7 |
+
from code_interpreter.JuypyterClient import JupyterNotebook
|
8 |
+
from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
|
9 |
+
from utils.const import *
|
10 |
+
|
11 |
+
from typing import List, Literal, Optional, Tuple, TypedDict, Dict
|
12 |
+
from colorama import init, Fore, Style
|
13 |
+
import copy
|
14 |
+
import re
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import transformers
|
18 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
19 |
+
from peft import PeftModel
|
20 |
+
|
21 |
+
|
22 |
+
sys.path.append(os.path.dirname(__file__))
|
23 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
24 |
+
from finetuning.conversation_template import msg_to_code_result_tok_temp
|
25 |
+
from utils.special_tok_llama2 import (
|
26 |
+
B_CODE,
|
27 |
+
E_CODE,
|
28 |
+
B_RESULT,
|
29 |
+
E_RESULT,
|
30 |
+
B_INST,
|
31 |
+
E_INST,
|
32 |
+
B_SYS,
|
33 |
+
E_SYS,
|
34 |
+
DEFAULT_PAD_TOKEN,
|
35 |
+
DEFAULT_BOS_TOKEN,
|
36 |
+
DEFAULT_EOS_TOKEN,
|
37 |
+
DEFAULT_UNK_TOKEN,
|
38 |
+
IGNORE_INDEX,
|
39 |
+
)
|
40 |
+
|
41 |
+
import warnings
|
42 |
+
|
43 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
|
44 |
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
45 |
+
|
46 |
+
|
47 |
+
class LlamaCodeInterpreter(BaseCodeInterpreter):
|
48 |
+
def __init__(
|
49 |
+
self,
|
50 |
+
model_path: str,
|
51 |
+
load_in_8bit: bool = False,
|
52 |
+
load_in_4bit: bool = False,
|
53 |
+
peft_model: Optional[str] = None,
|
54 |
+
):
|
55 |
+
# build tokenizer
|
56 |
+
self.tokenizer = LlamaTokenizer.from_pretrained(
|
57 |
+
model_path,
|
58 |
+
padding_side="right",
|
59 |
+
use_fast=False,
|
60 |
+
)
|
61 |
+
|
62 |
+
# Handle special tokens
|
63 |
+
special_tokens_dict = dict()
|
64 |
+
if self.tokenizer.pad_token is None:
|
65 |
+
special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
|
66 |
+
if self.tokenizer.eos_token is None:
|
67 |
+
special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
|
68 |
+
if self.tokenizer.bos_token is None:
|
69 |
+
special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
|
70 |
+
if self.tokenizer.unk_token is None:
|
71 |
+
special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
|
72 |
+
|
73 |
+
self.tokenizer.add_special_tokens(special_tokens_dict)
|
74 |
+
self.tokenizer.add_tokens(
|
75 |
+
[B_CODE, E_CODE, B_RESULT, E_RESULT, B_INST, E_INST, B_SYS, E_SYS],
|
76 |
+
special_tokens=True,
|
77 |
+
)
|
78 |
+
|
79 |
+
self.model = LlamaForCausalLM.from_pretrained(
|
80 |
+
model_path,
|
81 |
+
device_map="auto",
|
82 |
+
load_in_4bit=load_in_4bit,
|
83 |
+
load_in_8bit=load_in_8bit,
|
84 |
+
torch_dtype=torch.float16,
|
85 |
+
)
|
86 |
+
|
87 |
+
self.model.resize_token_embeddings(len(self.tokenizer))
|
88 |
+
|
89 |
+
if peft_model is not None:
|
90 |
+
peft_model = PeftModel.from_pretrained(self.model, peft_model)
|
91 |
+
|
92 |
+
self.model = self.model.eval()
|
93 |
+
|
94 |
+
self.dialog = [
|
95 |
+
{
|
96 |
+
"role": "system",
|
97 |
+
"content": CODE_INTERPRETER_SYSTEM_PROMPT + "\nUse code to answer",
|
98 |
+
},
|
99 |
+
# {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
|
100 |
+
# {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
|
101 |
+
]
|
102 |
+
|
103 |
+
self.nb = JupyterNotebook()
|
104 |
+
self.MAX_CODE_OUTPUT_LENGTH = 3000
|
105 |
+
out = self.nb.add_and_run(TOOLS_CODE) # tool import
|
106 |
+
print(out)
|
107 |
+
|
108 |
+
def dialog_to_prompt(self, dialog: List[Dict]) -> str:
|
109 |
+
full_str = msg_to_code_result_tok_temp(dialog)
|
110 |
+
|
111 |
+
return full_str
|
112 |
+
|
113 |
+
@torch.inference_mode()
|
114 |
+
def generate(
|
115 |
+
self,
|
116 |
+
prompt: str = "[INST]\n###User : hi\n###Assistant :",
|
117 |
+
max_new_tokens=512,
|
118 |
+
do_sample: bool = True,
|
119 |
+
use_cache: bool = True,
|
120 |
+
top_p: float = 0.95,
|
121 |
+
temperature: float = 0.1,
|
122 |
+
top_k: int = 50,
|
123 |
+
repetition_penalty: float = 1.0,
|
124 |
+
) -> str:
|
125 |
+
# Get the model and tokenizer, and tokenize the user text.
|
126 |
+
|
127 |
+
input_prompt = copy.deepcopy(prompt)
|
128 |
+
inputs = self.tokenizer([prompt], return_tensors="pt")
|
129 |
+
input_tokens_shape = inputs["input_ids"].shape[-1]
|
130 |
+
|
131 |
+
eos_token_id = self.tokenizer.convert_tokens_to_ids(DEFAULT_EOS_TOKEN)
|
132 |
+
e_code_token_id = self.tokenizer.convert_tokens_to_ids(E_CODE)
|
133 |
+
|
134 |
+
output = self.model.generate(
|
135 |
+
**inputs,
|
136 |
+
max_new_tokens=max_new_tokens,
|
137 |
+
do_sample=do_sample,
|
138 |
+
top_p=top_p,
|
139 |
+
temperature=temperature,
|
140 |
+
use_cache=use_cache,
|
141 |
+
top_k=top_k,
|
142 |
+
repetition_penalty=repetition_penalty,
|
143 |
+
eos_token_id=[
|
144 |
+
eos_token_id,
|
145 |
+
e_code_token_id,
|
146 |
+
], # Stop generation at either EOS or E_CODE token
|
147 |
+
)[0]
|
148 |
+
|
149 |
+
generated_tokens = output[input_tokens_shape:]
|
150 |
+
generated_text = self.tokenizer.decode(generated_tokens)
|
151 |
+
|
152 |
+
return generated_text
|
153 |
+
|
154 |
+
def extract_code_blocks(self, prompt: str) -> Tuple[bool, str]:
|
155 |
+
pattern = re.escape(B_CODE) + r"(.*?)" + re.escape(E_CODE)
|
156 |
+
matches = re.findall(pattern, prompt, re.DOTALL)
|
157 |
+
|
158 |
+
if matches:
|
159 |
+
# Return the last matched code block
|
160 |
+
return True, matches[-1].strip()
|
161 |
+
else:
|
162 |
+
return False, ""
|
163 |
+
|
164 |
+
def clean_code_output(self, output: str) -> str:
|
165 |
+
if self.MAX_CODE_OUTPUT_LENGTH < len(output):
|
166 |
+
return (
|
167 |
+
output[: self.MAX_CODE_OUTPUT_LENGTH // 5]
|
168 |
+
+ "...(skip)..."
|
169 |
+
+ output[-self.MAX_CODE_OUTPUT_LENGTH // 5 :]
|
170 |
+
)
|
171 |
+
|
172 |
+
return output
|
173 |
+
|
174 |
+
def chat(self, user_message: str, VERBOSE: bool = False, MAX_TRY=5):
|
175 |
+
self.dialog.append({"role": "user", "content": user_message})
|
176 |
+
if VERBOSE:
|
177 |
+
print(
|
178 |
+
"###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
|
179 |
+
)
|
180 |
+
print("\n###Assistant : ")
|
181 |
+
|
182 |
+
# setup
|
183 |
+
HAS_CODE = False # For now
|
184 |
+
INST_END_TOK_FLAG = False
|
185 |
+
full_generated_text = ""
|
186 |
+
prompt = self.dialog_to_prompt(dialog=self.dialog)
|
187 |
+
start_prompt = copy.deepcopy(prompt)
|
188 |
+
prompt = f"{prompt} {E_INST}"
|
189 |
+
|
190 |
+
generated_text = self.generate(prompt)
|
191 |
+
full_generated_text += generated_text
|
192 |
+
HAS_CODE, generated_code_block = self.extract_code_blocks(generated_text)
|
193 |
+
|
194 |
+
attempt = 1
|
195 |
+
while HAS_CODE:
|
196 |
+
if attempt > MAX_TRY:
|
197 |
+
break
|
198 |
+
# if no code then doesn't have to execute it
|
199 |
+
|
200 |
+
# replace unknown thing to none
|
201 |
+
generated_code_block = generated_code_block.replace("<unk>_", "").replace(
|
202 |
+
"<unk>", ""
|
203 |
+
)
|
204 |
+
|
205 |
+
code_block_output, error_flag = self.execute_code_and_return_output(
|
206 |
+
f"{generated_code_block}"
|
207 |
+
)
|
208 |
+
code_block_output = self.clean_code_output(code_block_output)
|
209 |
+
generated_text = (
|
210 |
+
f"{generated_text}\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
|
211 |
+
)
|
212 |
+
full_generated_text += f"\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
|
213 |
+
|
214 |
+
first_code_block_pos = (
|
215 |
+
generated_text.find(generated_code_block)
|
216 |
+
if generated_code_block
|
217 |
+
else -1
|
218 |
+
)
|
219 |
+
text_before_first_code_block = (
|
220 |
+
generated_text
|
221 |
+
if first_code_block_pos == -1
|
222 |
+
else generated_text[:first_code_block_pos]
|
223 |
+
)
|
224 |
+
if VERBOSE:
|
225 |
+
print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
|
226 |
+
print(Fore.GREEN + generated_code_block + Style.RESET_ALL)
|
227 |
+
print(
|
228 |
+
Fore.YELLOW
|
229 |
+
+ f"\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
|
230 |
+
+ Style.RESET_ALL
|
231 |
+
)
|
232 |
+
|
233 |
+
# prompt = f"{prompt} {E_INST}{generated_text}"
|
234 |
+
prompt = f"{prompt}{generated_text}"
|
235 |
+
generated_text = self.generate(prompt)
|
236 |
+
HAS_CODE, generated_code_block = self.extract_code_blocks(generated_text)
|
237 |
+
|
238 |
+
full_generated_text += generated_text
|
239 |
+
|
240 |
+
attempt += 1
|
241 |
+
|
242 |
+
if VERBOSE:
|
243 |
+
print(Fore.GREEN + generated_text + Style.RESET_ALL)
|
244 |
+
|
245 |
+
self.dialog.append(
|
246 |
+
{
|
247 |
+
"role": "assistant",
|
248 |
+
"content": full_generated_text.replace("<unk>_", "")
|
249 |
+
.replace("<unk>", "")
|
250 |
+
.replace("</s>", ""),
|
251 |
+
}
|
252 |
+
)
|
253 |
+
|
254 |
+
return self.dialog[-1]
|
255 |
+
|
256 |
+
|
257 |
+
if __name__ == "__main__":
|
258 |
+
import random
|
259 |
+
|
260 |
+
LLAMA2_MODEL_PATH = "./ckpt/llama-2-13b-chat"
|
261 |
+
LLAMA2_MODEL_PATH = "meta-llama/Llama-2-70b-chat-hf"
|
262 |
+
LLAMA2_FINETUNEED_PATH = "./output/llama-2-7b-chat-ci"
|
263 |
+
|
264 |
+
interpreter = LlamaCodeInterpreter(
|
265 |
+
model_path=LLAMA2_FINETUNEED_PATH, load_in_4bit=True
|
266 |
+
)
|
267 |
+
output = interpreter.chat(
|
268 |
+
user_message=random.choice(
|
269 |
+
[
|
270 |
+
# "In a circle with center \( O \), \( AB \) is a chord such that the midpoint of \( AB \) is \( M \). A tangent at \( A \) intersects the extended segment \( OB \) at \( P \). If \( AM = 12 \) cm and \( MB = 12 \) cm, find the length of \( AP \)."
|
271 |
+
# "A triangle \( ABC \) is inscribed in a circle (circumscribed). The sides \( AB \), \( BC \), and \( AC \) are tangent to the circle at points \( P \), \( Q \), and \( R \) respectively. If \( AP = 10 \) cm, \( BQ = 15 \) cm, and \( CR = 20 \) cm, find the radius of the circle.",
|
272 |
+
# "Given an integer array nums, return the total number of contiguous subarrays that have a sum equal to 0.",
|
273 |
+
"what is second largest city in japan?",
|
274 |
+
# "Can you show me 120days chart of tesla from today to before 120?"
|
275 |
+
]
|
276 |
+
),
|
277 |
+
VERBOSE=True,
|
278 |
+
)
|
279 |
+
|
280 |
+
while True:
|
281 |
+
input_char = input("Press 'q' to quit the dialog: ")
|
282 |
+
if input_char.lower() == "q":
|
283 |
+
break
|
284 |
+
|
285 |
+
else:
|
286 |
+
output = interpreter.chat(user_message=input_char, VERBOSE=True)
|
Llama2-Code-Interpreter/code_interpreter/RetrospectiveGPTCodeInterpreter.py
ADDED
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import time
|
5 |
+
import copy
|
6 |
+
import re
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import List, Literal, Optional, Tuple, TypedDict, Dict
|
9 |
+
import numpy as np
|
10 |
+
from tqdm import tqdm
|
11 |
+
|
12 |
+
# Get the path from environment variable
|
13 |
+
prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
14 |
+
sys.path.append(prj_root_path)
|
15 |
+
from code_interpreter.JuypyterClient import JupyterNotebook
|
16 |
+
from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
|
17 |
+
from utils.const import *
|
18 |
+
from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
|
19 |
+
|
20 |
+
# from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
|
21 |
+
from colorama import init, Fore, Style, Back
|
22 |
+
from rich.markdown import Markdown
|
23 |
+
import base64
|
24 |
+
|
25 |
+
import openai
|
26 |
+
from retrying import retry
|
27 |
+
import requests
|
28 |
+
import logging
|
29 |
+
from termcolor import colored
|
30 |
+
|
31 |
+
# load from key file
|
32 |
+
with open("./openai_api_key.txt") as f:
|
33 |
+
OPENAI_API_KEY = key = f.read()
|
34 |
+
openai.api_key = OPENAI_API_KEY
|
35 |
+
from utils.cleaner import clean_error_msg
|
36 |
+
|
37 |
+
|
38 |
+
def remove_string(s):
|
39 |
+
pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}:.*LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n"
|
40 |
+
return re.sub(pattern, "", s)
|
41 |
+
|
42 |
+
|
43 |
+
def clean_the_dialog(dialog, question):
|
44 |
+
question_idx = 0
|
45 |
+
for idx, item in enumerate(dialog):
|
46 |
+
if item["content"] == question:
|
47 |
+
question_idx = idx
|
48 |
+
|
49 |
+
filtered_dialog = dialog[question_idx:]
|
50 |
+
|
51 |
+
user_qinit_dict = filtered_dialog[0]
|
52 |
+
answer_fuse_str = "\n".join([i["content"].strip() for i in filtered_dialog[1::2]])
|
53 |
+
|
54 |
+
final_dialog_dict = [
|
55 |
+
{"role": "user", "content": user_qinit_dict["content"]},
|
56 |
+
{"role": "assistant", "content": answer_fuse_str},
|
57 |
+
]
|
58 |
+
|
59 |
+
return final_dialog_dict
|
60 |
+
|
61 |
+
|
62 |
+
@retry(
|
63 |
+
stop_max_attempt_number=7,
|
64 |
+
wait_exponential_multiplier=1000,
|
65 |
+
wait_exponential_max=10000,
|
66 |
+
)
|
67 |
+
def get_embedding(text, model="text-embedding-ada-002"):
|
68 |
+
global counter
|
69 |
+
headers = {
|
70 |
+
"Authorization": f"Bearer {OPENAI_API_KEY}", # Make sure to replace with your OpenAI API key
|
71 |
+
"Content-Type": "application/json",
|
72 |
+
}
|
73 |
+
payload = {"input": text, "model": model}
|
74 |
+
|
75 |
+
response = requests.post(
|
76 |
+
"https://api.openai.com/v1/embeddings", headers=headers, json=payload
|
77 |
+
)
|
78 |
+
|
79 |
+
if response.status_code != 200:
|
80 |
+
raise Exception(f"Request failed with status {response.status_code}")
|
81 |
+
|
82 |
+
return np.array(response.json()["data"][0]["embedding"])
|
83 |
+
|
84 |
+
|
85 |
+
class QueryRetrospect:
|
86 |
+
def __init__(
|
87 |
+
self,
|
88 |
+
data_directory="./gpt_data_gen_retrospect/",
|
89 |
+
embeddings_path="./gpt_data_gen_retrospect/embeddings.npy",
|
90 |
+
):
|
91 |
+
self.data_directory = data_directory
|
92 |
+
self.embeddings_path = embeddings_path
|
93 |
+
self.data = []
|
94 |
+
self.embeddings = []
|
95 |
+
|
96 |
+
if os.path.exists(embeddings_path):
|
97 |
+
print("++ Embedding Exists!")
|
98 |
+
self.embeddings = np.load(embeddings_path)
|
99 |
+
for fname in [i for i in os.listdir(data_directory) if i.endswith(".json")]:
|
100 |
+
with open(
|
101 |
+
os.path.join(data_directory, fname),
|
102 |
+
"r",
|
103 |
+
encoding="utf-8",
|
104 |
+
errors="replace",
|
105 |
+
) as f:
|
106 |
+
self.data.append(json.load(f))
|
107 |
+
else:
|
108 |
+
only_files = [
|
109 |
+
f
|
110 |
+
for f in os.listdir(data_directory)
|
111 |
+
if os.path.isfile(os.path.join(data_directory, f))
|
112 |
+
and f.endswith(".json")
|
113 |
+
]
|
114 |
+
|
115 |
+
for fname in tqdm(only_files):
|
116 |
+
with open(
|
117 |
+
os.path.join(data_directory, fname), "r", encoding="cp1252"
|
118 |
+
) as f:
|
119 |
+
data_point = json.load(f)
|
120 |
+
self.data.append(data_point)
|
121 |
+
self.embeddings.append(
|
122 |
+
get_embedding(data_point["execution_result"])
|
123 |
+
)
|
124 |
+
self.embeddings = np.array(self.embeddings)
|
125 |
+
self.save_embeddings()
|
126 |
+
print(f"++ Embedding Saved! {self.embeddings.shape}")
|
127 |
+
|
128 |
+
def save_embeddings(self):
|
129 |
+
np.save(self.embeddings_path, self.embeddings)
|
130 |
+
|
131 |
+
def __call__(self, query, top_k=3, VERBOSE: bool = False):
|
132 |
+
query_embedding = get_embedding(query)
|
133 |
+
similarities = np.dot(self.embeddings, query_embedding)
|
134 |
+
top_indices = similarities.argsort()[-top_k:][::-1]
|
135 |
+
return [self.data[i]["retrospection"] for i in top_indices]
|
136 |
+
|
137 |
+
|
138 |
+
class QueryRetrospectPrefix:
|
139 |
+
def __init__(
|
140 |
+
self,
|
141 |
+
model="gpt-4",
|
142 |
+
data_directory="./eval/gpt_mbpp_output",
|
143 |
+
embeddings_path="./eval/gpt_mbpp_output/embeddings.npy",
|
144 |
+
):
|
145 |
+
self.data_directory = data_directory
|
146 |
+
self.embeddings_path = embeddings_path
|
147 |
+
self.data = []
|
148 |
+
self.embeddings = []
|
149 |
+
|
150 |
+
if os.path.exists(embeddings_path):
|
151 |
+
print("++ Embedding Exists!")
|
152 |
+
self.embeddings = np.load(embeddings_path)
|
153 |
+
for fname in [i for i in os.listdir(data_directory) if i.endswith(".json")]:
|
154 |
+
with open(
|
155 |
+
os.path.join(data_directory, fname),
|
156 |
+
"r",
|
157 |
+
encoding="utf-8",
|
158 |
+
errors="replace",
|
159 |
+
) as f:
|
160 |
+
self.data.append(json.load(f))
|
161 |
+
else:
|
162 |
+
only_files = [
|
163 |
+
f
|
164 |
+
for f in os.listdir(data_directory)
|
165 |
+
if os.path.isfile(os.path.join(data_directory, f))
|
166 |
+
and f.endswith(".json")
|
167 |
+
]
|
168 |
+
|
169 |
+
for fname in tqdm(only_files):
|
170 |
+
with open(
|
171 |
+
os.path.join(data_directory, fname), "r", encoding="cp1252"
|
172 |
+
) as f:
|
173 |
+
data_point = json.load(f)
|
174 |
+
|
175 |
+
print(f'Processing "{data_point[1]["content"]}" ...')
|
176 |
+
self.data.append(data_point)
|
177 |
+
self.embeddings.append(get_embedding(data_point[1]["content"]))
|
178 |
+
|
179 |
+
self.embeddings = np.array(self.embeddings)
|
180 |
+
self.save_embeddings()
|
181 |
+
print(f"++ Embedding Saved! {self.embeddings.shape}")
|
182 |
+
|
183 |
+
self.model = model
|
184 |
+
self.dialog = [
|
185 |
+
{
|
186 |
+
"role": "system",
|
187 |
+
"content": "You are retrospection GPT. retrospect from the given data.",
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"role": "user",
|
191 |
+
"content": 'Current Question:\n\nWrite a Python function to solve the following task:\n\nfrom typing import List\n\ndef cum_sum(numbers: List[int]) -> List[int]:\n """\n From a given list of integers, generate a list representing the cumulative sum of elements at each index.\n >>> cum_sum([1, 2, 3, 4])\n [1, 3, 6, 10]\n """\n\nRetrieved Trajectories : \nIn a past interaction, a function named running_average was provided to calculate the running average of a list of numbers.\n\n```python\ndef running_average(numbers: List[int]) -> List[float]:\n total = 0\n averages = []\n for i, num in enumerate(numbers):\n total += num\n averages.append(total / (i+1))\n return averages\n\nprint(running_average([1,2,3,4])) # expected [1.0, 1.5, 2.0, 2.5]\n```\n```RESULT\n[1.0, 1.5, 2.0, 2.5]\n```\nThe output is expected. \n\n',
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"role": "assistant",
|
195 |
+
"content": "From previous similar questions :\nThe `running_average` function highlights an important concept of maintaining a running or cumulative value (total) as one iterates over the list. This is directly applicable to the cum_sum problem.\n\nApplication to the Question:\nFor the cum_sum function, one needs to maintain a cumulative total of the elements as we traverse through the list. The running_average function is most closely related since it involves accumulating a total and storing intermediate results. By adapting this logic (i.e., excluding the division operation to compute the average), one can easily derive the cumulative sum solution.",
|
196 |
+
},
|
197 |
+
]
|
198 |
+
self.response = ""
|
199 |
+
|
200 |
+
@retry(
|
201 |
+
stop_max_attempt_number=7,
|
202 |
+
wait_exponential_multiplier=1000,
|
203 |
+
wait_exponential_max=10000,
|
204 |
+
)
|
205 |
+
def ChatCompletion(self):
|
206 |
+
try:
|
207 |
+
self.response = openai.ChatCompletion.create(
|
208 |
+
model=self.model, messages=self.dialog, temperature=0.2, top_p=0.9
|
209 |
+
)
|
210 |
+
except Exception as e:
|
211 |
+
print(f"error while OPENAI api call {e} {self.response}")
|
212 |
+
|
213 |
+
def save_embeddings(self):
|
214 |
+
np.save(self.embeddings_path, self.embeddings)
|
215 |
+
|
216 |
+
def __call__(self, query, top_k=3, VERBOSE: bool = False):
|
217 |
+
query_embedding = get_embedding(query)
|
218 |
+
similarities = np.dot(self.embeddings, query_embedding)
|
219 |
+
top_indices = similarities.argsort()[-top_k:][::-1]
|
220 |
+
top_i = top_indices[0]
|
221 |
+
prior_traj = self.data[top_i][-1]["content"]
|
222 |
+
|
223 |
+
ask_dict = {
|
224 |
+
"role": "user",
|
225 |
+
"content": f"Current Question:\n\n{query}\n\nRetrieved Trajectories :\n{prior_traj}",
|
226 |
+
}
|
227 |
+
|
228 |
+
# print(f"From prior experience:\n{prior_traj}\n\nCurrent Question:\n{query}\n")
|
229 |
+
self.dialog.append(ask_dict)
|
230 |
+
self.ChatCompletion()
|
231 |
+
|
232 |
+
return self.response["choices"][0]["message"]["content"]
|
233 |
+
|
234 |
+
|
235 |
+
class RetrospectiveGPTCodeInterpreter(BaseCodeInterpreter):
|
236 |
+
def __init__(self, model="gpt-4"):
|
237 |
+
self.model = model
|
238 |
+
self.dialog = [
|
239 |
+
# {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT },
|
240 |
+
{
|
241 |
+
"role": "system",
|
242 |
+
"content": CODE_INTERPRETER_SYSTEM_PROMPT,
|
243 |
+
},
|
244 |
+
# {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
|
245 |
+
# {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
|
246 |
+
]
|
247 |
+
|
248 |
+
# self.dialog += few_shot_4
|
249 |
+
self.response = None
|
250 |
+
|
251 |
+
assert os.path.isfile(
|
252 |
+
"./openai_api_key.txt"
|
253 |
+
), "The openai_api_key.txt file could not be found. Please make sure it is in the same directory as this script, and that it contains your OpenAI API key."
|
254 |
+
|
255 |
+
# load from key file
|
256 |
+
with open("./openai_api_key.txt") as f:
|
257 |
+
OPENAI_API_KEY = f.read()
|
258 |
+
openai.api_key = OPENAI_API_KEY
|
259 |
+
|
260 |
+
self.nb = JupyterNotebook()
|
261 |
+
out = self.nb.add_and_run(TOOLS_CODE) # tool import
|
262 |
+
|
263 |
+
# retrospections
|
264 |
+
self.retrospector = QueryRetrospectPrefix()
|
265 |
+
|
266 |
+
def get_response_content(self):
|
267 |
+
if self.response:
|
268 |
+
return self.response["choices"][0]["message"]["content"]
|
269 |
+
else:
|
270 |
+
return None
|
271 |
+
|
272 |
+
@retry(
|
273 |
+
stop_max_attempt_number=7,
|
274 |
+
wait_exponential_multiplier=1000,
|
275 |
+
wait_exponential_max=10000,
|
276 |
+
)
|
277 |
+
def ChatCompletion(self):
|
278 |
+
try:
|
279 |
+
self.response = openai.ChatCompletion.create(
|
280 |
+
model=self.model, messages=self.dialog, temperature=0.2, top_p=0.9
|
281 |
+
)
|
282 |
+
except Exception as e:
|
283 |
+
print(f"error while OPENAI api call {e}")
|
284 |
+
|
285 |
+
def save_dialog(self, path: str = "./output/dialog.json"):
|
286 |
+
with open(path, "w") as f:
|
287 |
+
json.dump(self.dialog, f)
|
288 |
+
print(f" ++Dialog saved to [{path}]")
|
289 |
+
|
290 |
+
def close(self):
|
291 |
+
"""
|
292 |
+
close jupyter notebook, and this class instance
|
293 |
+
"""
|
294 |
+
self.nb.close()
|
295 |
+
|
296 |
+
def chat(
|
297 |
+
self,
|
298 |
+
user_message: str,
|
299 |
+
VERBOSE: bool = False,
|
300 |
+
MAX_TRY: int = 6,
|
301 |
+
code_exec_prefix: str = "",
|
302 |
+
feedback_prompt: str = "",
|
303 |
+
append_result: bool = True,
|
304 |
+
use_retrospect: bool = True,
|
305 |
+
):
|
306 |
+
prefix_retrospection = self.retrospector(query=user_message)
|
307 |
+
self.dialog.append(
|
308 |
+
{"role": "user", "content": f"{prefix_retrospection}\n\n{user_message}"}
|
309 |
+
)
|
310 |
+
init_feedback = copy.deepcopy(feedback_prompt)
|
311 |
+
|
312 |
+
code_block_output = ""
|
313 |
+
attempt = 0
|
314 |
+
img_data = None
|
315 |
+
|
316 |
+
if VERBOSE:
|
317 |
+
print(
|
318 |
+
"###Retrospection : "
|
319 |
+
+ Fore.BLUE
|
320 |
+
+ Back.WHITE
|
321 |
+
+ Style.BRIGHT
|
322 |
+
+ prefix_retrospection
|
323 |
+
+ Style.RESET_ALL
|
324 |
+
)
|
325 |
+
print(
|
326 |
+
"###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
|
327 |
+
)
|
328 |
+
print("\n###Assistant : ")
|
329 |
+
|
330 |
+
for i in range(MAX_TRY):
|
331 |
+
# GPT response
|
332 |
+
self.ChatCompletion()
|
333 |
+
|
334 |
+
# Get code block
|
335 |
+
generated_text = self.get_response_content()
|
336 |
+
generated_code_blocks = self.extract_code_blocks(generated_text)
|
337 |
+
# execute code
|
338 |
+
if len(generated_code_blocks) > 0:
|
339 |
+
# Find the position of the first code block in the last answer
|
340 |
+
first_code_block_pos = (
|
341 |
+
generated_text.find(generated_code_blocks[0])
|
342 |
+
if generated_code_blocks
|
343 |
+
else -1
|
344 |
+
)
|
345 |
+
text_before_first_code_block = (
|
346 |
+
generated_text
|
347 |
+
if first_code_block_pos == -1
|
348 |
+
else generated_text[:first_code_block_pos]
|
349 |
+
)
|
350 |
+
if VERBOSE:
|
351 |
+
print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
|
352 |
+
if VERBOSE:
|
353 |
+
print(
|
354 |
+
Fore.YELLOW
|
355 |
+
+ generated_code_blocks[0]
|
356 |
+
+ "\n```\n"
|
357 |
+
+ Style.RESET_ALL
|
358 |
+
)
|
359 |
+
code_block_output, error_flag = self.execute_code_and_return_output(
|
360 |
+
generated_code_blocks[0]
|
361 |
+
)
|
362 |
+
|
363 |
+
code_block_output = f"{code_block_output}"
|
364 |
+
|
365 |
+
if code_block_output is not None:
|
366 |
+
code_block_output = code_block_output.strip()
|
367 |
+
|
368 |
+
code_block_output = remove_string(code_block_output)
|
369 |
+
if len(code_block_output) > 500:
|
370 |
+
code_block_output = (
|
371 |
+
code_block_output[:200] + "⋯(skip)⋯" + code_block_output[-200:]
|
372 |
+
)
|
373 |
+
code_block_output_str = f"\n```RESULT\n{code_block_output}\n```\n"
|
374 |
+
if append_result:
|
375 |
+
gen_final = f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}"
|
376 |
+
if VERBOSE:
|
377 |
+
print(
|
378 |
+
Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL
|
379 |
+
)
|
380 |
+
else:
|
381 |
+
gen_final = (
|
382 |
+
f"{text_before_first_code_block}{generated_code_blocks[0]}\n```"
|
383 |
+
)
|
384 |
+
|
385 |
+
self.dialog.append(
|
386 |
+
{
|
387 |
+
"role": "assistant",
|
388 |
+
"content": gen_final,
|
389 |
+
}
|
390 |
+
)
|
391 |
+
|
392 |
+
feedback_prompt = f"{init_feedback}\nif you accomplish the instruction just say <done>\nIf not keep going."
|
393 |
+
if VERBOSE:
|
394 |
+
print(Fore.MAGENTA + feedback_prompt + Style.RESET_ALL)
|
395 |
+
|
396 |
+
feedback_dict = {
|
397 |
+
"role": "user",
|
398 |
+
"content": feedback_prompt,
|
399 |
+
}
|
400 |
+
|
401 |
+
self.dialog.append(feedback_dict)
|
402 |
+
|
403 |
+
else:
|
404 |
+
if "<done>" in generated_text:
|
405 |
+
generated_text = generated_text.split("<done>")[0].strip()
|
406 |
+
|
407 |
+
if len(generated_text) <= 0:
|
408 |
+
break
|
409 |
+
|
410 |
+
if VERBOSE:
|
411 |
+
print(Fore.GREEN + generated_text + Style.RESET_ALL)
|
412 |
+
|
413 |
+
self.dialog.append(
|
414 |
+
{
|
415 |
+
"role": "assistant",
|
416 |
+
"content": f"{generated_text}",
|
417 |
+
}
|
418 |
+
)
|
419 |
+
break
|
420 |
+
|
421 |
+
self.dialog = [self.dialog[0]] + clean_the_dialog(
|
422 |
+
self.dialog, question=f"{prefix_retrospection}\n\n{user_message}"
|
423 |
+
) # delete retrospections after generation step
|
424 |
+
|
425 |
+
return self.dialog[-1]
|
426 |
+
|
427 |
+
|
428 |
+
if __name__ == "__main__":
|
429 |
+
import pickle
|
430 |
+
import random
|
431 |
+
from tqdm import tqdm
|
432 |
+
|
433 |
+
# python3 -m code_interpreter.RetrospectiveGPTCodeInterpreter
|
434 |
+
|
435 |
+
retro_interpreter = RetrospectiveGPTCodeInterpreter(model="gpt-4")
|
436 |
+
|
437 |
+
instruction = """
|
438 |
+
Write a Python script to solve the following problem:
|
439 |
+
|
440 |
+
def get_row(lst, x):
|
441 |
+
\"\"\"
|
442 |
+
You are given a 2 dimensional data, as a nested lists,
|
443 |
+
which is similar to matrix, however, unlike matrices,
|
444 |
+
each row may contain a different number of columns.
|
445 |
+
Given lst, and integer x, find integers x in the list,
|
446 |
+
and return list of tuples, [(x1, y1), (x2, y2) ...] such that
|
447 |
+
each tuple is a coordinate - (row, columns), starting with 0.
|
448 |
+
Sort coordinates initially by rows in ascending order.
|
449 |
+
Also, sort coordinates of the row by columns in descending order.
|
450 |
+
|
451 |
+
Examples:
|
452 |
+
get_row([
|
453 |
+
[1,2,3,4,5,6],
|
454 |
+
[1,2,3,4,1,6],
|
455 |
+
[1,2,3,4,5,1]
|
456 |
+
], 1) == [(0, 0), (1, 4), (1, 0), (2, 5), (2, 0)]
|
457 |
+
get_row([], 1) == []
|
458 |
+
get_row([[], [1], [1, 2, 3]], 3) == [(2, 2)]
|
459 |
+
\"\"\"
|
460 |
+
|
461 |
+
Ensure the solution is verified by printing the expected output.
|
462 |
+
"""
|
463 |
+
# instruction = "Can you make a image of astraunaut in the garden?"
|
464 |
+
|
465 |
+
# example
|
466 |
+
retro_interpreter.chat(
|
467 |
+
user_message=instruction,
|
468 |
+
MAX_TRY=5,
|
469 |
+
use_retrospect=True,
|
470 |
+
feedback_prompt="Ensure the output matches the expected result, taking into account any corner cases. If discrepancies arise, pinpoint where you went wrong. Then, refine the code to achieve the desired outcome.",
|
471 |
+
VERBOSE=True,
|
472 |
+
)
|
Llama2-Code-Interpreter/code_interpreter/__pycache__/JuypyterClient.cpython-311.pyc
ADDED
Binary file (4.02 kB). View file
|
|
Llama2-Code-Interpreter/code_interpreter/__pycache__/LlamaCodeInterpreter.cpython-311.pyc
ADDED
Binary file (11.8 kB). View file
|
|
Llama2-Code-Interpreter/code_interpreter/llama_hf.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
import os, sys
|
3 |
+
|
4 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from datetime import datetime
|
8 |
+
|
9 |
+
sys.path.append(os.path.dirname(__file__))
|
10 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
11 |
+
from utils.special_tok_llama2 import (
|
12 |
+
B_CODE,
|
13 |
+
E_CODE,
|
14 |
+
B_RESULT,
|
15 |
+
E_RESULT,
|
16 |
+
B_INST,
|
17 |
+
E_INST,
|
18 |
+
B_SYS,
|
19 |
+
E_SYS,
|
20 |
+
DEFAULT_PAD_TOKEN,
|
21 |
+
DEFAULT_BOS_TOKEN,
|
22 |
+
DEFAULT_EOS_TOKEN,
|
23 |
+
DEFAULT_UNK_TOKEN,
|
24 |
+
IGNORE_INDEX,
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
def create_peft_config(model):
|
29 |
+
from peft import (
|
30 |
+
get_peft_model,
|
31 |
+
LoraConfig,
|
32 |
+
TaskType,
|
33 |
+
prepare_model_for_int8_training,
|
34 |
+
)
|
35 |
+
|
36 |
+
peft_config = LoraConfig(
|
37 |
+
task_type=TaskType.CAUSAL_LM,
|
38 |
+
inference_mode=False,
|
39 |
+
r=8,
|
40 |
+
lora_alpha=32,
|
41 |
+
lora_dropout=0.05,
|
42 |
+
target_modules=["q_proj", "v_proj"],
|
43 |
+
)
|
44 |
+
|
45 |
+
# prepare int-8 model for training
|
46 |
+
model = prepare_model_for_int8_training(model)
|
47 |
+
model = get_peft_model(model, peft_config)
|
48 |
+
model.print_trainable_parameters()
|
49 |
+
return model, peft_config
|
50 |
+
|
51 |
+
|
52 |
+
def build_model_from_hf_path(
|
53 |
+
hf_base_model_path: str = "./ckpt/llama-2-13b-chat",
|
54 |
+
load_peft: Optional[bool] = False,
|
55 |
+
peft_model_path: Optional[str] = None,
|
56 |
+
load_in_4bit: bool = True,
|
57 |
+
):
|
58 |
+
start_time = datetime.now()
|
59 |
+
|
60 |
+
# build tokenizer
|
61 |
+
tokenizer = LlamaTokenizer.from_pretrained(
|
62 |
+
hf_base_model_path,
|
63 |
+
padding_side="right",
|
64 |
+
use_fast=False,
|
65 |
+
)
|
66 |
+
|
67 |
+
# Handle special tokens
|
68 |
+
special_tokens_dict = dict()
|
69 |
+
if tokenizer.pad_token is None:
|
70 |
+
special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
|
71 |
+
if tokenizer.eos_token is None:
|
72 |
+
special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
|
73 |
+
if tokenizer.bos_token is None:
|
74 |
+
special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
|
75 |
+
if tokenizer.unk_token is None:
|
76 |
+
special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
|
77 |
+
|
78 |
+
tokenizer.add_special_tokens(special_tokens_dict)
|
79 |
+
tokenizer.add_tokens(
|
80 |
+
[B_CODE, E_CODE, B_RESULT, E_RESULT, B_INST, E_INST, B_SYS, E_SYS],
|
81 |
+
special_tokens=True,
|
82 |
+
)
|
83 |
+
|
84 |
+
# build model
|
85 |
+
model = LlamaForCausalLM.from_pretrained(
|
86 |
+
hf_base_model_path,
|
87 |
+
load_in_4bit=load_in_4bit,
|
88 |
+
device_map="auto",
|
89 |
+
)
|
90 |
+
|
91 |
+
model.resize_token_embeddings(len(tokenizer))
|
92 |
+
|
93 |
+
if load_peft and (peft_model_path is not None):
|
94 |
+
from peft import PeftModel
|
95 |
+
|
96 |
+
model = PeftModel.from_pretrained(model, peft_model_path)
|
97 |
+
|
98 |
+
end_time = datetime.now()
|
99 |
+
elapsed_time = end_time - start_time
|
100 |
+
|
101 |
+
return {"tokenizer": tokenizer, "model": model}
|
Llama2-Code-Interpreter/eval/eval.md
ADDED
File without changes
|
Llama2-Code-Interpreter/eval/gsm8k.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
JSON_PATH = "/home/seungyoun/llama_related/llama_code_interpreter/eval/grade-school-math/grade_school_math/data/test.jsonl"
|
2 |
+
|
3 |
+
import os, sys
|
4 |
+
import random
|
5 |
+
import json
|
6 |
+
import re
|
7 |
+
from tqdm import tqdm
|
8 |
+
|
9 |
+
|
10 |
+
def load_jsonl(file_path, num_lines_to_display=5):
|
11 |
+
data = []
|
12 |
+
with open(file_path, "r") as f:
|
13 |
+
for line in f:
|
14 |
+
out_dict = json.loads(line)
|
15 |
+
answer_raw = out_dict["answer"]
|
16 |
+
answer_refined = answer_raw.split("####")[-1].strip()
|
17 |
+
out_dict["answer_refined"] = answer_refined
|
18 |
+
data.append(out_dict) # dict_keys(['question', 'answer'])
|
19 |
+
return data
|
20 |
+
|
21 |
+
|
22 |
+
few_shot_1 = [
|
23 |
+
{
|
24 |
+
"role": "user",
|
25 |
+
"content": "Sandra wants to buy some sweets. She saved $10 for this purpose. Her mother gave her an additional $4, and her father twice as much as her mother. One candy costs $0.5, and one jelly bean $0.2. She wants to buy 14 candies and 20 jelly beans. How much money will she be left with after the purchase?",
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"role": "assistant",
|
29 |
+
"content": "```python\n# Saved Money\nsandra_money = 10\n# mother gave her\nmother_money = 4\n# father gave twice as much as her mother\nfather_money = 2 * mother_money\n# Cost\ncandy_cost = 0.5\njelly_bean_cost = 0.2\n\n# Calculations\ntotal_money = sandra_money + mother_money + father_money\n#Desired to buy\nShe wants to buy 14 candies and 20 jelly beans so the total cost would be\ntotal_cost = 14 * candy_cost + 20 * jelly_bean_cost\n# After the purchase\nremaining_money = total_money - total_cost\n\nprint(remaining_money)\n```\n```RESULT\n11.0\n```\nAfter buying 14 candies and 20 jelly beans, Sandra will be left with $11.",
|
30 |
+
},
|
31 |
+
]
|
32 |
+
|
33 |
+
few_shot_2 = [
|
34 |
+
{
|
35 |
+
"role": "user",
|
36 |
+
"content": "Keanu bought some fish from the store for his pets. He gave his cat half as many fish as he gave to his dog. If he gave his dog 40 fish, and each fish cost him $4, calculate the total amount of money he spent at the store to buy all the fish.",
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"role": "assistant",
|
40 |
+
"content": "```python\ndef calculate_total_cost(dog_fish, fish_price):\n cat_fish = dog_fish / 2\n total_fish = dog_fish + cat_fish\n total_cost = total_fish * fish_price\n return total_cost\n\ntotal_cost = calculate_total_cost(40, 4)\nprint(total_cost)\n```\n```RESULT\n240.0\n```\nKeanu spent a total of $240 to buy all the fish from the store.\n### Answer : 240",
|
41 |
+
},
|
42 |
+
]
|
43 |
+
|
44 |
+
few_shot_3 = [
|
45 |
+
{
|
46 |
+
"role": "user",
|
47 |
+
"content": "Mike is feeding the birds at the park. He counts out the seeds as he throws them. He throws 20 seeds to the birds on the left. He throws twice as much to the bigger group of birds on the right. Some more birds flutter over to join the others for lunch and Mike throws 30 more seeds for them. If Mike has 30 seeds left to feed the last of the birds, how many seeds did Mike start with?",
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"role": "assistant",
|
51 |
+
"content": "Let's break down the problem:\n\n- Mike throws 20 seeds to the birds on the left.\n- He throws twice as much to the bigger group of birds on the right, which is 2 * 20 = 40 seeds.\n- Some more birds join and Mike throws 30 more seeds for them.\n- Mike has 30 seeds left to feed the last of the birds.\n\nSo, the total number of seeds Mike started with is the sum of all these seeds.\n\n```python\nseeds_left = 20\nseeds_right = 2 * seeds_left\nseeds_more_birds = 30\nseeds_last = 30\n\ntotal_seeds = seeds_left + seeds_right + seeds_more_birds + seeds_last\nprint(total_seeds)\n```\n```RESULT\n120\n```\nFrom the calculations, we can conclude that Mike started with 120 seeds to feed the birds at the park.\n### Answer : 120",
|
52 |
+
},
|
53 |
+
]
|
54 |
+
|
55 |
+
if __name__ == "__main__":
|
56 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
57 |
+
from code_interpreter.LlamaCodeInterpreter import LlamaCodeInterpreter
|
58 |
+
import argparse
|
59 |
+
|
60 |
+
parser = argparse.ArgumentParser(description="Process path for LLAMA2_FINETUNEED.")
|
61 |
+
parser.add_argument(
|
62 |
+
"--path",
|
63 |
+
type=str,
|
64 |
+
required=True,
|
65 |
+
help="Path to the finetuned LLAMA2 model.",
|
66 |
+
default='"./output/llama-2-7b-chat-ci"',
|
67 |
+
)
|
68 |
+
args = parser.parse_args()
|
69 |
+
LLAMA2_FINETUNEED_PATH = args.path
|
70 |
+
|
71 |
+
interpreter = LlamaCodeInterpreter(
|
72 |
+
model_path=LLAMA2_FINETUNEED_PATH,
|
73 |
+
# load_in_4bit=True
|
74 |
+
)
|
75 |
+
|
76 |
+
sample_data = load_jsonl(JSON_PATH)
|
77 |
+
correct = 0
|
78 |
+
|
79 |
+
for idx, data_dict in enumerate(tqdm(sample_data)):
|
80 |
+
question = data_dict["question"]
|
81 |
+
answer = data_dict["answer"]
|
82 |
+
answer_only = data_dict["answer_refined"]
|
83 |
+
|
84 |
+
# reset dilag : fewshot to follow answering format
|
85 |
+
interpreter.dialog = [
|
86 |
+
{
|
87 |
+
"role": "system",
|
88 |
+
"content": "You are helpful robot that can generate code , excute it and debug then answer",
|
89 |
+
}
|
90 |
+
] # this will replaced in template conversion
|
91 |
+
# interpreter.dialog += few_shot_1
|
92 |
+
# interpreter.dialog += few_shot_2
|
93 |
+
# interpreter.dialog += few_shot_3
|
94 |
+
|
95 |
+
output = interpreter.chat(
|
96 |
+
user_message=f"{question}",
|
97 |
+
VERBOSE=True,
|
98 |
+
)
|
99 |
+
|
100 |
+
pattern = r"\[RESULT_TOK\]\s*(\d+(\.\d+)?)\s*\[/RESULT_TOK\]"
|
101 |
+
pred = -9212323 # for no code output (which is wrong answer)
|
102 |
+
if re.search(pattern, output["content"]):
|
103 |
+
pred = re.search(pattern, output["content"]).group(1)
|
104 |
+
|
105 |
+
pred = str(pred)
|
106 |
+
answer_only = str(answer_only)
|
107 |
+
if float(pred.replace(",", "")) == float(answer_only.replace(",", "")):
|
108 |
+
correct += 1
|
109 |
+
|
110 |
+
print("-" * 30)
|
111 |
+
print(f"\tThe question was : {question}")
|
112 |
+
print(f"\tThe answer was : [{answer_only}]")
|
113 |
+
print(f"\tModel pred is : [{pred}]")
|
114 |
+
print(f"\t Accuracy : [{correct/(idx+1)}]")
|
115 |
+
print("-" * 30)
|
Llama2-Code-Interpreter/eval/human_eval.py
ADDED
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
import traceback
|
3 |
+
|
4 |
+
HUMAN_EVAL_PATH = os.path.join(
|
5 |
+
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
6 |
+
"human-eval",
|
7 |
+
)
|
8 |
+
|
9 |
+
sys.path.append(HUMAN_EVAL_PATH)
|
10 |
+
from human_eval.data import write_jsonl, read_problems
|
11 |
+
from finetuning.conversation_template import msg_to_code_result_tok_temp
|
12 |
+
from code_interpreter.llama_hf import build_model_from_hf_path
|
13 |
+
from code_interpreter.LlamaCodeInterpreter import LlamaCodeInterpreter
|
14 |
+
from code_interpreter.GPTCodeInterpreter import GPTCodeInterpreter
|
15 |
+
from code_interpreter.RetrospectiveGPTCodeInterpreter import (
|
16 |
+
RetrospectiveGPTCodeInterpreter,
|
17 |
+
)
|
18 |
+
|
19 |
+
import re
|
20 |
+
|
21 |
+
from rich import print
|
22 |
+
from rich.panel import Panel
|
23 |
+
from rich.syntax import Syntax
|
24 |
+
from rich.text import Text
|
25 |
+
|
26 |
+
from timeout_decorator import timeout
|
27 |
+
|
28 |
+
wrong = 0
|
29 |
+
|
30 |
+
|
31 |
+
def extract_text(prompt, remove_lines=True):
|
32 |
+
token = '"""'
|
33 |
+
start = token
|
34 |
+
end = ">>>"
|
35 |
+
# end = '"""'
|
36 |
+
|
37 |
+
start_idx = prompt.find(start) + len(start)
|
38 |
+
end_idx = prompt.find(end)
|
39 |
+
|
40 |
+
output = prompt[start_idx:end_idx]
|
41 |
+
if remove_lines:
|
42 |
+
output = output.replace("\n", " ")
|
43 |
+
output = re.sub(r"\s+", " ", output).strip()
|
44 |
+
|
45 |
+
return output
|
46 |
+
|
47 |
+
|
48 |
+
def extract_all_code_block(input_str: str) -> str:
|
49 |
+
pattern = r"\[CODE_START_TOK\](.*?)\[/CODE_END_TOK\]"
|
50 |
+
matches = re.findall(pattern, input_str, re.DOTALL)
|
51 |
+
return "\n".join([match.strip() for match in matches]) if matches else None
|
52 |
+
|
53 |
+
|
54 |
+
def extract_all_code_block_gpt(input_str: str) -> str:
|
55 |
+
pattern = r"```python(.*?)```"
|
56 |
+
matches = re.findall(pattern, input_str, re.DOTALL)
|
57 |
+
|
58 |
+
return "\n".join([match.strip() for match in matches]) if matches else None
|
59 |
+
|
60 |
+
|
61 |
+
def delete_print_asser(code_text: str):
|
62 |
+
lines = code_text.split("\n")
|
63 |
+
new_lines = list()
|
64 |
+
for i in lines:
|
65 |
+
if i.strip().startswith("print("):
|
66 |
+
continue
|
67 |
+
new_lines.append(i)
|
68 |
+
|
69 |
+
new_code_text = "\n".join(new_lines)
|
70 |
+
return new_code_text
|
71 |
+
|
72 |
+
|
73 |
+
def extract_function_from_code_block(code_block: str) -> str:
|
74 |
+
lines = code_block.split("\n")
|
75 |
+
function_lines = []
|
76 |
+
|
77 |
+
inside_function = False
|
78 |
+
for line in lines:
|
79 |
+
# Start extracting from function definition
|
80 |
+
if line.startswith("def "):
|
81 |
+
inside_function = True
|
82 |
+
|
83 |
+
# If we are inside the function, append lines
|
84 |
+
if inside_function:
|
85 |
+
function_lines.append(line)
|
86 |
+
|
87 |
+
# If we encounter an unindented line that isn't a comment and isn't the start of another function, stop.
|
88 |
+
if (
|
89 |
+
not line.startswith(" ")
|
90 |
+
and not line.startswith("#")
|
91 |
+
and not line.startswith("def ")
|
92 |
+
):
|
93 |
+
break
|
94 |
+
|
95 |
+
# Remove trailing comments or blank lines and the last line which caused the exit from the loop
|
96 |
+
while function_lines and (
|
97 |
+
function_lines[-1].strip() == ""
|
98 |
+
or function_lines[-1].strip().startswith("#")
|
99 |
+
or not function_lines[-1].startswith(" ")
|
100 |
+
):
|
101 |
+
function_lines.pop()
|
102 |
+
|
103 |
+
return "\n".join(function_lines)
|
104 |
+
|
105 |
+
|
106 |
+
def get_last_outermost_function_name(function_str):
|
107 |
+
matches = re.findall(r"^def (\w+)", function_str, re.MULTILINE)
|
108 |
+
if matches:
|
109 |
+
return matches[-1] # Return the last (outermost) function name
|
110 |
+
return ""
|
111 |
+
|
112 |
+
|
113 |
+
def get_last_function_name(function_str):
|
114 |
+
# Regular expression to match a function definition
|
115 |
+
matches = re.findall(r"def (\w+)", function_str)
|
116 |
+
if matches:
|
117 |
+
return matches[-1] # Return the last function name
|
118 |
+
return ""
|
119 |
+
|
120 |
+
|
121 |
+
def get_outermost_function_name(function_str):
|
122 |
+
matches = re.findall(r"^def (\w+)", function_str, re.MULTILINE)
|
123 |
+
if matches:
|
124 |
+
return matches[0] # Return the first (outermost) function name
|
125 |
+
return ""
|
126 |
+
|
127 |
+
|
128 |
+
def get_function_name(function_str):
|
129 |
+
# Regular expression to match a function definition
|
130 |
+
match = re.search(r"def (\w+)", function_str)
|
131 |
+
if match:
|
132 |
+
return match.group(0)
|
133 |
+
return ""
|
134 |
+
|
135 |
+
|
136 |
+
def extract_test_assertion(test_func: str):
|
137 |
+
test_cases = list()
|
138 |
+
for i in test_func.split("\n"):
|
139 |
+
if "assert" in i:
|
140 |
+
test_cases.append(i.strip())
|
141 |
+
|
142 |
+
return ("\n".join(test_cases)).strip()
|
143 |
+
|
144 |
+
|
145 |
+
import_str = """
|
146 |
+
import re
|
147 |
+
import math
|
148 |
+
from typing import List, Tuple, Optional
|
149 |
+
"""
|
150 |
+
|
151 |
+
|
152 |
+
@timeout(100, timeout_exception=TimeoutError)
|
153 |
+
def exec_with_timeout(import_str, full_test_code):
|
154 |
+
env = {**locals()}
|
155 |
+
code_to_exec = f"{import_str}\n{full_test_code}"
|
156 |
+
try:
|
157 |
+
exec(code_to_exec, env)
|
158 |
+
except Exception as e:
|
159 |
+
print(f"Error Type: {type(e).__name__}, Error Message: {e}")
|
160 |
+
return False # Return False if there's an error during execution
|
161 |
+
return True # Return True if executed without errors
|
162 |
+
|
163 |
+
|
164 |
+
if __name__ == "__main__":
|
165 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
166 |
+
import argparse
|
167 |
+
|
168 |
+
parser = argparse.ArgumentParser(description="Process path for LLAMA2_FINETUNEED.")
|
169 |
+
parser.add_argument(
|
170 |
+
"--path",
|
171 |
+
type=str,
|
172 |
+
required=True,
|
173 |
+
help="Path to the finetuned LLAMA2 model.",
|
174 |
+
default='"./output/llama-2-7b-chat-ci"',
|
175 |
+
)
|
176 |
+
parser.add_argument(
|
177 |
+
"--model",
|
178 |
+
type=str,
|
179 |
+
required=False,
|
180 |
+
help="Path to the finetuned LLAMA2 model.",
|
181 |
+
default='"./output/llama-2-7b-chat-ci"',
|
182 |
+
)
|
183 |
+
parser.add_argument(
|
184 |
+
"--max-retry",
|
185 |
+
type=int,
|
186 |
+
required=False,
|
187 |
+
help="Maximum number of retries.",
|
188 |
+
default=5, # You can set any default value you want here.
|
189 |
+
)
|
190 |
+
args = parser.parse_args()
|
191 |
+
PROGRAMMING_PUZZLE_Q = True
|
192 |
+
|
193 |
+
problems = read_problems()
|
194 |
+
correct_total = 0
|
195 |
+
total_problems = len(problems)
|
196 |
+
|
197 |
+
for idx, task_id in enumerate(problems):
|
198 |
+
if "gpt" not in args.model.lower():
|
199 |
+
LLAMA2_FINETUNEED_PATH = args.path
|
200 |
+
interpreter = LlamaCodeInterpreter(
|
201 |
+
model_path=LLAMA2_FINETUNEED_PATH,
|
202 |
+
# load_in_4bit=True
|
203 |
+
)
|
204 |
+
else:
|
205 |
+
interpreter = RetrospectiveGPTCodeInterpreter(
|
206 |
+
model=args.model,
|
207 |
+
)
|
208 |
+
|
209 |
+
# dict_keys(['task_id', 'prompt', 'entry_point', 'canonical_solution', 'test'])
|
210 |
+
programming_puzzle = problems[task_id]["prompt"].replace(" ", "\t")
|
211 |
+
text_only_problem = extract_text(programming_puzzle)
|
212 |
+
|
213 |
+
interpreter.dialog = [
|
214 |
+
{
|
215 |
+
"role": "system",
|
216 |
+
"content": "You are helpful robot that can generate code , excute it and debug then answer",
|
217 |
+
}
|
218 |
+
]
|
219 |
+
|
220 |
+
if PROGRAMMING_PUZZLE_Q:
|
221 |
+
# programming puzzle
|
222 |
+
output_str = interpreter.chat(
|
223 |
+
user_message=f"Write a Python script to solve the following problem:\n{programming_puzzle}\nEnsure the solution is verified by printing the expected output.",
|
224 |
+
MAX_TRY=args.max_retry,
|
225 |
+
VERBOSE=True,
|
226 |
+
code_exec_prefix=f"\nfrom typing import List,Tuple\nimport math\n",
|
227 |
+
feedback_prompt="Ensure the output matches the expected result, taking into account any corner cases. If discrepancies arise, pinpoint where you went wrong. Then, refine the code to achieve the desired outcome.",
|
228 |
+
append_result=True,
|
229 |
+
)["content"]
|
230 |
+
|
231 |
+
else:
|
232 |
+
output_str = interpreter.chat(
|
233 |
+
user_message=f"Write a Python script for this problem:\n{text_only_problem}",
|
234 |
+
MAX_TRY=args.max_retry,
|
235 |
+
VERBOSE=True,
|
236 |
+
code_exec_prefix=f"\nfrom typing import List,Tuple\nimport math\n",
|
237 |
+
feedback_prompt="Ensure the output matches the expected result. If not tell where you got wrong, then refine the code to achieve the desired outcome.",
|
238 |
+
append_result=True,
|
239 |
+
)["content"]
|
240 |
+
|
241 |
+
function_str = ""
|
242 |
+
if "gpt" not in args.model.lower():
|
243 |
+
code_block = extract_all_code_block(output_str)
|
244 |
+
else:
|
245 |
+
code_block = extract_all_code_block_gpt(output_str)
|
246 |
+
if (code_block is not None) and ("def" in code_block):
|
247 |
+
function_str = code_block
|
248 |
+
|
249 |
+
# function_name = get_last_outermost_function_name(function_str)
|
250 |
+
function_str = delete_print_asser(function_str)
|
251 |
+
function_name = get_last_outermost_function_name(function_str)
|
252 |
+
full_test_code = f"{function_str}\n#-----------\n{problems[task_id]['test']}\ncheck({function_name})"
|
253 |
+
|
254 |
+
# Print the full_test_code with syntax highlighting
|
255 |
+
syntax = Syntax(
|
256 |
+
# f"{programming_puzzle}\n{full_test_code}",
|
257 |
+
f"{full_test_code}",
|
258 |
+
"python",
|
259 |
+
theme="monokai",
|
260 |
+
line_numbers=True,
|
261 |
+
)
|
262 |
+
print(syntax)
|
263 |
+
|
264 |
+
is_correct = False # default is wrong
|
265 |
+
timeout_flag = False
|
266 |
+
try:
|
267 |
+
is_correct = exec_with_timeout(import_str, full_test_code)
|
268 |
+
except TimeoutError as e:
|
269 |
+
timeout_flag = True
|
270 |
+
print(f"Timeout with error msg : {e}")
|
271 |
+
|
272 |
+
if is_correct:
|
273 |
+
correct_total += 1
|
274 |
+
|
275 |
+
acc = (correct_total) / (idx + 1)
|
276 |
+
# save dialog
|
277 |
+
interpreter.save_dialog(
|
278 |
+
path=f"./eval/gpt_humaneval_output/{task_id.replace('/','_')}_{is_correct}.json"
|
279 |
+
)
|
280 |
+
interpreter.close()
|
281 |
+
del interpreter
|
282 |
+
|
283 |
+
# Constructing the output
|
284 |
+
accuracy_text = Text(
|
285 |
+
f"Accuracy: {correct_total}/{idx+1}[{total_problems}] = {acc:.2%} [{is_correct}]",
|
286 |
+
style="bold blue",
|
287 |
+
)
|
288 |
+
panel = Panel(accuracy_text, title="Results", border_style="green")
|
289 |
+
print(panel)
|
Llama2-Code-Interpreter/eval/inference.py
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from typing import Dict, Optional, Sequence
|
3 |
+
import logging
|
4 |
+
import os, sys
|
5 |
+
import copy
|
6 |
+
|
7 |
+
import torch
|
8 |
+
import transformers
|
9 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer, TextStreamer
|
10 |
+
|
11 |
+
from torch.utils.data import Dataset
|
12 |
+
from transformers import Trainer
|
13 |
+
|
14 |
+
import torch
|
15 |
+
from rich.console import Console
|
16 |
+
from rich.table import Table
|
17 |
+
from datetime import datetime
|
18 |
+
from threading import Thread
|
19 |
+
|
20 |
+
sys.path.append(os.path.dirname(__file__))
|
21 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
22 |
+
from utils.special_tok_llama2 import (
|
23 |
+
B_CODE,
|
24 |
+
E_CODE,
|
25 |
+
B_RESULT,
|
26 |
+
E_RESULT,
|
27 |
+
B_INST,
|
28 |
+
E_INST,
|
29 |
+
B_SYS,
|
30 |
+
E_SYS,
|
31 |
+
DEFAULT_PAD_TOKEN,
|
32 |
+
DEFAULT_BOS_TOKEN,
|
33 |
+
DEFAULT_EOS_TOKEN,
|
34 |
+
DEFAULT_UNK_TOKEN,
|
35 |
+
IGNORE_INDEX,
|
36 |
+
)
|
37 |
+
|
38 |
+
from finetuning.conversation_template import (
|
39 |
+
json_to_code_result_tok_temp,
|
40 |
+
msg_to_code_result_tok_temp,
|
41 |
+
)
|
42 |
+
|
43 |
+
import warnings
|
44 |
+
|
45 |
+
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
|
46 |
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
47 |
+
|
48 |
+
console = Console() # for pretty print
|
49 |
+
|
50 |
+
|
51 |
+
@dataclass
|
52 |
+
class ModelArguments:
|
53 |
+
model_name_or_path: Optional[str] = field(default="./output/llama-2-7b-chat-ci")
|
54 |
+
load_peft: Optional[bool] = field(default=False)
|
55 |
+
peft_model_name_or_path: Optional[str] = field(
|
56 |
+
default="./output/llama-2-7b-chat-ci"
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
def create_peft_config(model):
|
61 |
+
from peft import (
|
62 |
+
get_peft_model,
|
63 |
+
LoraConfig,
|
64 |
+
TaskType,
|
65 |
+
prepare_model_for_int8_training,
|
66 |
+
)
|
67 |
+
|
68 |
+
peft_config = LoraConfig(
|
69 |
+
task_type=TaskType.CAUSAL_LM,
|
70 |
+
inference_mode=False,
|
71 |
+
r=8,
|
72 |
+
lora_alpha=32,
|
73 |
+
lora_dropout=0.05,
|
74 |
+
target_modules=["q_proj", "v_proj"],
|
75 |
+
)
|
76 |
+
|
77 |
+
# prepare int-8 model for training
|
78 |
+
model = prepare_model_for_int8_training(model)
|
79 |
+
model = get_peft_model(model, peft_config)
|
80 |
+
model.print_trainable_parameters()
|
81 |
+
return model, peft_config
|
82 |
+
|
83 |
+
|
84 |
+
def build_model_from_hf_path(
|
85 |
+
hf_base_model_path: str = "./ckpt/llama-2-13b-chat",
|
86 |
+
load_peft: Optional[bool] = False,
|
87 |
+
peft_model_path: Optional[str] = None,
|
88 |
+
):
|
89 |
+
start_time = datetime.now()
|
90 |
+
|
91 |
+
# build tokenizer
|
92 |
+
console.log("[bold cyan]Building tokenizer...[/bold cyan]")
|
93 |
+
tokenizer = LlamaTokenizer.from_pretrained(
|
94 |
+
hf_base_model_path,
|
95 |
+
padding_side="right",
|
96 |
+
use_fast=False,
|
97 |
+
)
|
98 |
+
|
99 |
+
# Handle special tokens
|
100 |
+
console.log("[bold cyan]Handling special tokens...[/bold cyan]")
|
101 |
+
special_tokens_dict = dict()
|
102 |
+
if tokenizer.pad_token is None:
|
103 |
+
special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
|
104 |
+
if tokenizer.eos_token is None:
|
105 |
+
special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
|
106 |
+
if tokenizer.bos_token is None:
|
107 |
+
special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
|
108 |
+
if tokenizer.unk_token is None:
|
109 |
+
special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
|
110 |
+
|
111 |
+
tokenizer.add_special_tokens(special_tokens_dict)
|
112 |
+
tokenizer.add_tokens(
|
113 |
+
[B_CODE, B_RESULT, E_RESULT, B_INST, E_INST, B_SYS, E_SYS],
|
114 |
+
special_tokens=True,
|
115 |
+
)
|
116 |
+
|
117 |
+
# build model
|
118 |
+
console.log("[bold cyan]Building model...[/bold cyan]")
|
119 |
+
model = LlamaForCausalLM.from_pretrained(
|
120 |
+
hf_base_model_path,
|
121 |
+
load_in_4bit=True,
|
122 |
+
device_map="auto",
|
123 |
+
)
|
124 |
+
|
125 |
+
model.resize_token_embeddings(len(tokenizer))
|
126 |
+
|
127 |
+
if load_peft and (peft_model_path is not None):
|
128 |
+
from peft import PeftModel
|
129 |
+
|
130 |
+
model = PeftModel.from_pretrained(model, peft_model_path)
|
131 |
+
console.log("[bold green]Peft Model Loaded[/bold green]")
|
132 |
+
|
133 |
+
end_time = datetime.now()
|
134 |
+
elapsed_time = end_time - start_time
|
135 |
+
|
136 |
+
# Log time performance
|
137 |
+
table = Table(title="Time Performance")
|
138 |
+
table.add_column("Task", style="cyan")
|
139 |
+
table.add_column("Time Taken", justify="right")
|
140 |
+
table.add_row("Loading model", str(elapsed_time))
|
141 |
+
console.print(table)
|
142 |
+
|
143 |
+
console.log("[bold green]Model Loaded[/bold green]")
|
144 |
+
return {"tokenizer": tokenizer, "model": model}
|
145 |
+
|
146 |
+
|
147 |
+
@torch.inference_mode()
|
148 |
+
def inference(
|
149 |
+
user_input="What is 100th fibo num?",
|
150 |
+
max_new_tokens=512,
|
151 |
+
do_sample: bool = True,
|
152 |
+
use_cache: bool = True,
|
153 |
+
top_p: float = 1.0,
|
154 |
+
temperature: float = 0.1,
|
155 |
+
top_k: int = 50,
|
156 |
+
repetition_penalty: float = 1.0,
|
157 |
+
):
|
158 |
+
parser = transformers.HfArgumentParser(ModelArguments)
|
159 |
+
model_args = parser.parse_args_into_dataclasses()[0]
|
160 |
+
|
161 |
+
model_dict = build_model_from_hf_path(
|
162 |
+
hf_base_model_path=model_args.model_name_or_path,
|
163 |
+
load_peft=model_args.load_peft,
|
164 |
+
peft_model_path=model_args.peft_model_name_or_path,
|
165 |
+
)
|
166 |
+
|
167 |
+
model = model_dict["model"]
|
168 |
+
tokenizer = model_dict["tokenizer"]
|
169 |
+
|
170 |
+
streamer = TextStreamer(tokenizer, skip_prompt=True)
|
171 |
+
|
172 |
+
# peft
|
173 |
+
# create peft config
|
174 |
+
model.eval()
|
175 |
+
|
176 |
+
user_prompt = msg_to_code_result_tok_temp(
|
177 |
+
[{"role": "user", "content": f"{user_input}"}]
|
178 |
+
)
|
179 |
+
# Printing user's content in blue
|
180 |
+
console.print("\n" + "-" * 20, style="#808080")
|
181 |
+
console.print(f"###User : {user_input}\n", style="blue")
|
182 |
+
|
183 |
+
prompt = f"{user_prompt}\n###Assistant :"
|
184 |
+
# prompt = f"{user_input}\n### Assistant : Here is python code to get the 55th fibonacci number {B_CODE}\n"
|
185 |
+
|
186 |
+
inputs = tokenizer([prompt], return_tensors="pt")
|
187 |
+
|
188 |
+
generated_text = model.generate(
|
189 |
+
**inputs,
|
190 |
+
streamer=streamer,
|
191 |
+
max_new_tokens=max_new_tokens,
|
192 |
+
do_sample=do_sample,
|
193 |
+
top_p=top_p,
|
194 |
+
temperature=temperature,
|
195 |
+
use_cache=use_cache,
|
196 |
+
top_k=top_k,
|
197 |
+
repetition_penalty=repetition_penalty,
|
198 |
+
)
|
199 |
+
|
200 |
+
return generated_text
|
201 |
+
|
202 |
+
|
203 |
+
if __name__ == "__main__":
|
204 |
+
inference(user_input="what is sin(44)?")
|
Llama2-Code-Interpreter/finetuning/__pycache__/conversation_template.cpython-311.pyc
ADDED
Binary file (3.84 kB). View file
|
|
Llama2-Code-Interpreter/finetuning/codellama_wrapper.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
2 |
+
from transformers import LlamaModel, LlamaConfig
|
3 |
+
import torch.nn as nn
|
4 |
+
|
5 |
+
CODELLAMA_VOCAB_SIZE = 32016
|
6 |
+
|
7 |
+
|
8 |
+
class CodeLlamaForCausalLM(LlamaForCausalLM):
|
9 |
+
_tied_weights_keys = ["lm_head.weight"]
|
10 |
+
|
11 |
+
def __init__(self, config):
|
12 |
+
super().__init__(config)
|
13 |
+
self.model = LlamaModel(config)
|
14 |
+
self.vocab_size = config.vocab_size
|
15 |
+
self.lm_head = nn.Linear(config.hidden_size, CODELLAMA_VOCAB_SIZE, bias=False)
|
16 |
+
self.model.embed_tokens = nn.Embedding(
|
17 |
+
CODELLAMA_VOCAB_SIZE, config.hidden_size, config.pad_token_id
|
18 |
+
)
|
19 |
+
|
20 |
+
# Initialize weights and apply final processing
|
21 |
+
self.post_init()
|
Llama2-Code-Interpreter/finetuning/conversation_template.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, sys
|
2 |
+
|
3 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
4 |
+
|
5 |
+
import json
|
6 |
+
import re
|
7 |
+
from typing import List, Dict
|
8 |
+
|
9 |
+
DATA_DIR = "gpt_data_gen"
|
10 |
+
|
11 |
+
B_CODE = "[CODE_START_TOK]"
|
12 |
+
E_CODE = "[/CODE_END_TOK]"
|
13 |
+
|
14 |
+
B_RESULT = "[RESULT_TOK]"
|
15 |
+
E_RESULT = "[/RESULT_TOK]"
|
16 |
+
|
17 |
+
B_INST, E_INST = "[INST]", "[/INST]"
|
18 |
+
B_SYS, E_SYS = "<<SYS>>", "<</SYS>>"
|
19 |
+
|
20 |
+
BOS = "<s>"
|
21 |
+
EOS = "</s>"
|
22 |
+
|
23 |
+
CODE_SYS_PROMPT_FOR_TRAIN = """
|
24 |
+
You are 'CodeLLama', an advanced Language Model assistant that can generate, execute, and evaluate code.
|
25 |
+
Respond to user queries by providing code-based solutions and insights.
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
def msg_to_code_result_tok_temp(msg: List[Dict]) -> str:
|
30 |
+
full_str = f"{BOS}{B_INST} {B_SYS}\n{CODE_SYS_PROMPT_FOR_TRAIN}\n{E_SYS}\n\n"
|
31 |
+
|
32 |
+
user_first_flag = True
|
33 |
+
for idx, chat in enumerate(msg):
|
34 |
+
if chat["role"] == "system":
|
35 |
+
continue
|
36 |
+
if chat["role"].lower() == "user":
|
37 |
+
chat["content"] = chat["content"]
|
38 |
+
if user_first_flag:
|
39 |
+
full_str += f"{chat['content']} {E_INST}"
|
40 |
+
user_first_flag = False
|
41 |
+
else:
|
42 |
+
full_str += f"{BOS}{B_INST}{chat['content']} {E_INST}"
|
43 |
+
elif chat["role"] == "assistant":
|
44 |
+
chat["content"] = chat["content"].replace(
|
45 |
+
"/home/seungyoun/llama_code_interpreter/", "./"
|
46 |
+
)
|
47 |
+
|
48 |
+
# Replace the code block start and end markers using regex
|
49 |
+
code_pattern = re.compile(r"```python\n(.*?)```", re.DOTALL)
|
50 |
+
chat["content"] = code_pattern.sub(
|
51 |
+
r"[CODE_START_TOK]\n\1[/CODE_END_TOK]", chat["content"]
|
52 |
+
)
|
53 |
+
|
54 |
+
# Replace the result block start and end markers using regex
|
55 |
+
result_pattern = re.compile(r"```RESULTS?\n(.*?)```", re.DOTALL)
|
56 |
+
chat["content"] = result_pattern.sub(
|
57 |
+
r"[RESULT_TOK]\n\1[/RESULT_TOK]", chat["content"]
|
58 |
+
)
|
59 |
+
|
60 |
+
full_str += f"{chat['content']}{EOS}"
|
61 |
+
|
62 |
+
full_str = full_str.replace("')()", "')")
|
63 |
+
full_str = full_str.replace("/home/seungyoun/llama_code_interpreter/", "./")
|
64 |
+
|
65 |
+
return full_str
|
66 |
+
|
67 |
+
|
68 |
+
def json_to_code_result_tok_temp(json_file_name: str = "425.json") -> str:
|
69 |
+
file_rel_path = os.path.join(DATA_DIR, json_file_name)
|
70 |
+
|
71 |
+
with open(file_rel_path, "r") as json_file:
|
72 |
+
msg = json.load(json_file)
|
73 |
+
|
74 |
+
full_str = msg_to_code_result_tok_temp(msg)
|
75 |
+
|
76 |
+
return full_str
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
print(json_to_code_result_tok_temp())
|
Llama2-Code-Interpreter/finetuning/train.py
ADDED
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass, field
|
2 |
+
from typing import Dict, Optional, Sequence
|
3 |
+
import logging
|
4 |
+
import os, sys
|
5 |
+
import copy
|
6 |
+
|
7 |
+
import torch
|
8 |
+
import transformers
|
9 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
10 |
+
|
11 |
+
from torch.utils.data import Dataset
|
12 |
+
from transformers import Trainer
|
13 |
+
|
14 |
+
sys.path.append(os.path.dirname(__file__))
|
15 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
16 |
+
from utils.special_tok_llama2 import (
|
17 |
+
B_CODE,
|
18 |
+
E_CODE,
|
19 |
+
B_RESULT,
|
20 |
+
E_RESULT,
|
21 |
+
B_INST,
|
22 |
+
E_INST,
|
23 |
+
B_SYS,
|
24 |
+
E_SYS,
|
25 |
+
DEFAULT_PAD_TOKEN,
|
26 |
+
DEFAULT_BOS_TOKEN,
|
27 |
+
DEFAULT_EOS_TOKEN,
|
28 |
+
DEFAULT_UNK_TOKEN,
|
29 |
+
IGNORE_INDEX,
|
30 |
+
)
|
31 |
+
|
32 |
+
from conversation_template import json_to_code_result_tok_temp
|
33 |
+
|
34 |
+
|
35 |
+
@dataclass
|
36 |
+
class ModelArguments:
|
37 |
+
model_name_or_path: Optional[str] = field(default="./ckpt/llama-2-13b-chat")
|
38 |
+
peft: bool = field(default=False)
|
39 |
+
|
40 |
+
|
41 |
+
@dataclass
|
42 |
+
class DataArguments:
|
43 |
+
data_path: str = field(
|
44 |
+
default=None, metadata={"help": "Path to the training data."}
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
@dataclass
|
49 |
+
class TrainingArguments(transformers.TrainingArguments):
|
50 |
+
cache_dir: Optional[str] = field(default=None)
|
51 |
+
optim: str = field(default="adamw_torch")
|
52 |
+
model_max_length: int = field(
|
53 |
+
default=4096,
|
54 |
+
metadata={
|
55 |
+
"help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."
|
56 |
+
},
|
57 |
+
)
|
58 |
+
|
59 |
+
|
60 |
+
def create_peft_config(model):
|
61 |
+
from peft import (
|
62 |
+
get_peft_model,
|
63 |
+
LoraConfig,
|
64 |
+
TaskType,
|
65 |
+
prepare_model_for_int8_training,
|
66 |
+
)
|
67 |
+
|
68 |
+
peft_config = LoraConfig(
|
69 |
+
task_type=TaskType.CAUSAL_LM,
|
70 |
+
inference_mode=False,
|
71 |
+
r=8,
|
72 |
+
lora_alpha=16,
|
73 |
+
lora_dropout=0.05,
|
74 |
+
target_modules=["q_proj", "v_proj"],
|
75 |
+
)
|
76 |
+
|
77 |
+
# prepare int-8 model for training
|
78 |
+
model = prepare_model_for_int8_training(model)
|
79 |
+
model = get_peft_model(model, peft_config)
|
80 |
+
model.print_trainable_parameters()
|
81 |
+
print(f"Using Peft")
|
82 |
+
return model, peft_config
|
83 |
+
|
84 |
+
|
85 |
+
def _tokenize_fn(
|
86 |
+
strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer
|
87 |
+
) -> Dict:
|
88 |
+
"""Tokenize a list of strings."""
|
89 |
+
tokenized_list = [
|
90 |
+
tokenizer(
|
91 |
+
text,
|
92 |
+
return_tensors="pt",
|
93 |
+
padding="longest",
|
94 |
+
max_length=tokenizer.model_max_length,
|
95 |
+
truncation=True,
|
96 |
+
)
|
97 |
+
for text in strings
|
98 |
+
]
|
99 |
+
|
100 |
+
input_ids = [tokenized.input_ids[0] for tokenized in tokenized_list]
|
101 |
+
input_ids_lens = [
|
102 |
+
tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item()
|
103 |
+
for tokenized in tokenized_list
|
104 |
+
]
|
105 |
+
return dict(
|
106 |
+
input_ids=input_ids,
|
107 |
+
input_ids_lens=input_ids_lens,
|
108 |
+
)
|
109 |
+
|
110 |
+
|
111 |
+
def find_all_sublist_end(main_list, sublist):
|
112 |
+
"""Find all the ending indices of a sublist in a main list."""
|
113 |
+
sublist_len = len(sublist)
|
114 |
+
main_list = main_list.tolist()
|
115 |
+
indices = []
|
116 |
+
for index in (i for i, e in enumerate(main_list) if e == sublist[0]):
|
117 |
+
if main_list[index : index + sublist_len] == sublist:
|
118 |
+
indices.append(index + sublist_len)
|
119 |
+
return indices
|
120 |
+
|
121 |
+
|
122 |
+
def find_all_sublist_start(main_list, sublist):
|
123 |
+
"""Find all the starting indices of a sublist in a main list."""
|
124 |
+
sublist_len = len(sublist)
|
125 |
+
main_list = main_list.tolist()
|
126 |
+
indices = []
|
127 |
+
for index in (i for i, e in enumerate(main_list) if e == sublist[0]):
|
128 |
+
if main_list[index : index + sublist_len] == sublist:
|
129 |
+
indices.append(index)
|
130 |
+
return indices
|
131 |
+
|
132 |
+
|
133 |
+
def preprocess(
|
134 |
+
trajs: Sequence[str],
|
135 |
+
tokenizer: transformers.PreTrainedTokenizer,
|
136 |
+
) -> Dict:
|
137 |
+
INST_START_INDEX = tokenizer.encode(f"{B_INST}")[-1]
|
138 |
+
INST_END_INDEX = tokenizer.encode(f"{E_INST}")[-1]
|
139 |
+
RESULT_START_INDEX = tokenizer.encode(f"{B_RESULT}")[-1]
|
140 |
+
RESULT_END_INDEX = tokenizer.encode(f"{E_RESULT}")[-1]
|
141 |
+
|
142 |
+
"""Preprocess the data by tokenizing."""
|
143 |
+
examples_tokenized = _tokenize_fn(trajs, tokenizer)
|
144 |
+
|
145 |
+
input_ids_lens = examples_tokenized["input_ids_lens"]
|
146 |
+
input_ids = examples_tokenized["input_ids"] # [torch.tensor , torch.tensor , ...]
|
147 |
+
labels = copy.deepcopy(input_ids)
|
148 |
+
|
149 |
+
# IGNORE INDEX SET
|
150 |
+
for i, label in enumerate(labels):
|
151 |
+
user_start_inds = find_all_sublist_start(label, [INST_START_INDEX])
|
152 |
+
assistant_start_inds = find_all_sublist_end(label, [INST_END_INDEX])
|
153 |
+
|
154 |
+
result_start_inds = find_all_sublist_start(label, [RESULT_START_INDEX])
|
155 |
+
result_end_inds = find_all_sublist_end(label, [RESULT_END_INDEX])
|
156 |
+
|
157 |
+
# for debug
|
158 |
+
# for len_i, ind in enumerate(label):
|
159 |
+
# print(f'{len_i}|{ind} -> "{tokenizer.decode(ind)}"')
|
160 |
+
|
161 |
+
assert len(user_start_inds) == len(
|
162 |
+
assistant_start_inds
|
163 |
+
), f"User and Assistant pair should be equal :: \n\tUser [{user_start_inds}]/\n\tAssistant [{assistant_start_inds}]\n\n Text : \n{trajs[i]}"
|
164 |
+
|
165 |
+
assert len(result_start_inds) == len(
|
166 |
+
result_end_inds
|
167 |
+
), f"Start and End indices pairs do not match.: : \nText : \n{trajs[i]}"
|
168 |
+
|
169 |
+
for user_start_ind, assistant_start_ind in zip(
|
170 |
+
user_start_inds, assistant_start_inds
|
171 |
+
):
|
172 |
+
label[user_start_ind + 1 : assistant_start_ind - 1] = IGNORE_INDEX
|
173 |
+
|
174 |
+
for start, end in zip(result_start_inds, result_end_inds):
|
175 |
+
label[start + 1 : end - 1] = IGNORE_INDEX
|
176 |
+
|
177 |
+
# cut max length
|
178 |
+
input_ids = [i[:1500] for i in input_ids]
|
179 |
+
labels = [i[:1500] for i in labels]
|
180 |
+
|
181 |
+
return dict(input_ids=input_ids, labels=labels)
|
182 |
+
|
183 |
+
|
184 |
+
class SupervisedDataset(Dataset):
|
185 |
+
"""Dataset for supervised fine-tuning."""
|
186 |
+
|
187 |
+
def __init__(self, data_path: str, tokenizer: transformers.PreTrainedTokenizer):
|
188 |
+
super(SupervisedDataset, self).__init__()
|
189 |
+
logging.warning(f"Loading data from data path : {data_path}")
|
190 |
+
all_json = os.listdir(data_path)
|
191 |
+
|
192 |
+
trajs = list()
|
193 |
+
for json_file_name in all_json:
|
194 |
+
traj = json_to_code_result_tok_temp(json_file_name=json_file_name)
|
195 |
+
trajs.append(traj)
|
196 |
+
|
197 |
+
logging.warning("Tokenizing inputs... This may take some time...")
|
198 |
+
data_dict = preprocess(trajs, tokenizer)
|
199 |
+
|
200 |
+
self.input_ids = data_dict["input_ids"]
|
201 |
+
self.labels = data_dict["labels"]
|
202 |
+
|
203 |
+
def __len__(self):
|
204 |
+
return len(self.input_ids)
|
205 |
+
|
206 |
+
def __getitem__(self, i) -> Dict[str, torch.Tensor]:
|
207 |
+
return dict(input_ids=self.input_ids[i], labels=self.labels[i])
|
208 |
+
|
209 |
+
|
210 |
+
@dataclass
|
211 |
+
class DataCollatorForSupervisedDataset(object):
|
212 |
+
"""Collate examples for supervised fine-tuning."""
|
213 |
+
|
214 |
+
tokenizer: transformers.PreTrainedTokenizer
|
215 |
+
|
216 |
+
def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
|
217 |
+
input_ids, labels = tuple(
|
218 |
+
[instance[key] for instance in instances] for key in ("input_ids", "labels")
|
219 |
+
)
|
220 |
+
input_ids = torch.nn.utils.rnn.pad_sequence(
|
221 |
+
input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
|
222 |
+
)
|
223 |
+
labels = torch.nn.utils.rnn.pad_sequence(
|
224 |
+
labels, batch_first=True, padding_value=IGNORE_INDEX
|
225 |
+
)
|
226 |
+
return dict(
|
227 |
+
input_ids=input_ids,
|
228 |
+
labels=labels,
|
229 |
+
attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
|
230 |
+
)
|
231 |
+
|
232 |
+
|
233 |
+
def make_supervised_data_module(
|
234 |
+
tokenizer: transformers.PreTrainedTokenizer, data_args
|
235 |
+
) -> Dict:
|
236 |
+
"""Make dataset and collator for supervised fine-tuning."""
|
237 |
+
train_dataset = SupervisedDataset(
|
238 |
+
tokenizer=tokenizer, data_path=data_args.data_path
|
239 |
+
)
|
240 |
+
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
241 |
+
return dict(
|
242 |
+
train_dataset=train_dataset, eval_dataset=None, data_collator=data_collator
|
243 |
+
)
|
244 |
+
|
245 |
+
|
246 |
+
def build_model_from_hf_path(
|
247 |
+
hf_model_path: str = "./ckpt/llama-2-13b-chat", peft: bool = False
|
248 |
+
):
|
249 |
+
# build tokenizer
|
250 |
+
tokenizer = LlamaTokenizer.from_pretrained(
|
251 |
+
hf_model_path,
|
252 |
+
padding_side="right",
|
253 |
+
use_fast=False,
|
254 |
+
)
|
255 |
+
|
256 |
+
special_tokens_dict = dict()
|
257 |
+
if tokenizer.pad_token is None:
|
258 |
+
special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
|
259 |
+
if tokenizer.eos_token is None:
|
260 |
+
special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
|
261 |
+
if tokenizer.bos_token is None:
|
262 |
+
special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
|
263 |
+
if tokenizer.unk_token is None:
|
264 |
+
special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
|
265 |
+
|
266 |
+
tokenizer.add_special_tokens(special_tokens_dict)
|
267 |
+
|
268 |
+
tokenizer.add_tokens(
|
269 |
+
[
|
270 |
+
B_CODE, # 32001
|
271 |
+
E_CODE, # 32002
|
272 |
+
B_RESULT, # 32003
|
273 |
+
E_RESULT, # 32004
|
274 |
+
B_INST,
|
275 |
+
E_INST,
|
276 |
+
B_SYS,
|
277 |
+
E_SYS, # 32008
|
278 |
+
],
|
279 |
+
special_tokens=True,
|
280 |
+
)
|
281 |
+
|
282 |
+
# build model
|
283 |
+
if peft:
|
284 |
+
model = LlamaForCausalLM.from_pretrained(
|
285 |
+
hf_model_path,
|
286 |
+
load_in_8bit=True,
|
287 |
+
device_map="auto",
|
288 |
+
ignore_mismatched_sizes=True,
|
289 |
+
torch_dtype=torch.float16,
|
290 |
+
)
|
291 |
+
else:
|
292 |
+
# for llama
|
293 |
+
# model = LlamaForCausalLM.from_pretrained(
|
294 |
+
# hf_model_path, ignore_mismatched_sizes=True
|
295 |
+
# )
|
296 |
+
|
297 |
+
# for codellama
|
298 |
+
from codellama_wrapper import CodeLlamaForCausalLM
|
299 |
+
|
300 |
+
model = CodeLlamaForCausalLM.from_pretrained(hf_model_path)
|
301 |
+
|
302 |
+
model.resize_token_embeddings(len(tokenizer))
|
303 |
+
|
304 |
+
return {"tokenizer": tokenizer, "model": model}
|
305 |
+
|
306 |
+
|
307 |
+
def train():
|
308 |
+
parser = transformers.HfArgumentParser(
|
309 |
+
(ModelArguments, DataArguments, TrainingArguments)
|
310 |
+
)
|
311 |
+
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
312 |
+
|
313 |
+
model_dict = build_model_from_hf_path(
|
314 |
+
hf_model_path=model_args.model_name_or_path, peft=model_args.peft
|
315 |
+
)
|
316 |
+
|
317 |
+
model, tokenizer = model_dict["model"], model_dict["tokenizer"]
|
318 |
+
# peft setting
|
319 |
+
model.train()
|
320 |
+
if model_args.peft:
|
321 |
+
model, lora_config = create_peft_config(model)
|
322 |
+
|
323 |
+
# make dataset
|
324 |
+
data_module = make_supervised_data_module(tokenizer=tokenizer, data_args=data_args)
|
325 |
+
trainer = Trainer(
|
326 |
+
model=model, tokenizer=tokenizer, args=training_args, **data_module
|
327 |
+
)
|
328 |
+
|
329 |
+
# train
|
330 |
+
trainer.train()
|
331 |
+
trainer.save_state()
|
332 |
+
trainer.save_model(output_dir=training_args.output_dir)
|
333 |
+
|
334 |
+
|
335 |
+
if __name__ == "__main__":
|
336 |
+
train()
|
Llama2-Code-Interpreter/prompt/__init__.py
ADDED
File without changes
|
Llama2-Code-Interpreter/prompt/gpt4_prompt.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
few_shot_1 = [
|
2 |
+
{"role": "user", "content": "what is 100th fibonacci sequence?"},
|
3 |
+
{
|
4 |
+
"role": "assistant",
|
5 |
+
"content": """
|
6 |
+
```python
|
7 |
+
def fibonacci(n):
|
8 |
+
a, b = 0, 1
|
9 |
+
for _ in range(n):
|
10 |
+
a, b = b, a + b
|
11 |
+
return a
|
12 |
+
|
13 |
+
fibo_100 = fibonacci(100)
|
14 |
+
print(fibo_100)
|
15 |
+
```
|
16 |
+
```RESULT
|
17 |
+
354224848179261915075
|
18 |
+
```
|
19 |
+
""",
|
20 |
+
},
|
21 |
+
{"role": "user", "content": "Keep going"},
|
22 |
+
{
|
23 |
+
"role": "assistant",
|
24 |
+
"content": "The 100th Fibonacci number is 354224848179261915075",
|
25 |
+
},
|
26 |
+
]
|
27 |
+
|
28 |
+
few_shot_2 = [
|
29 |
+
{
|
30 |
+
"role": "user",
|
31 |
+
"content": "Can you write a Python script that uses a machine learning algorithm to predict stock prices using historical data?",
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"role": "assistant",
|
35 |
+
"content": """
|
36 |
+
Sure, here is a simple example of how you might use a linear regression model from the `sklearn` library to predict stock prices.
|
37 |
+
This example uses the `pandas_datareader` library to fetch historical stock data from Yahoo Finance.
|
38 |
+
|
39 |
+
Please note that this is a very basic example and real-world stock price prediction is a complex task that involves much more than just historical prices.
|
40 |
+
Other factors such as company earnings, economic indicators, and market sentiment also play a big role.
|
41 |
+
|
42 |
+
```python
|
43 |
+
import pandas as pd
|
44 |
+
import numpy as np
|
45 |
+
import yfinance as yf
|
46 |
+
from datetime import datetime as dt
|
47 |
+
|
48 |
+
# Get the stock data
|
49 |
+
df = yf.download('AAPL', start='2012-01-01', end='2022-01-01')
|
50 |
+
|
51 |
+
# Prepare the data for the model
|
52 |
+
df['Date'] = df.index
|
53 |
+
df['Date'] = pd.to_datetime(df['Date'])
|
54 |
+
df['Date'] = df['Date'].map(dt.toordinal)
|
55 |
+
|
56 |
+
df.head(3)
|
57 |
+
```
|
58 |
+
```RESULT
|
59 |
+
[*********************100%***********************] 1 of 1 completed
|
60 |
+
Open High Low Close Adj Close Volume
|
61 |
+
Date
|
62 |
+
2012-01-03 14.621429 14.732143 14.607143 14.686786 12.482925 302220800 \
|
63 |
+
2012-01-04 14.642857 14.810000 14.617143 14.765714 12.550011 260022000
|
64 |
+
2012-01-05 14.819643 14.948214 14.738214 14.929643 12.689340 271269600
|
65 |
+
|
66 |
+
Date
|
67 |
+
Date
|
68 |
+
2012-01-03 734505
|
69 |
+
2012-01-04 734506
|
70 |
+
2012-01-05 734507
|
71 |
+
```
|
72 |
+
|
73 |
+
""",
|
74 |
+
},
|
75 |
+
{"role": "user", "content": "Keep going"},
|
76 |
+
{
|
77 |
+
"role": "assistant",
|
78 |
+
"content": """
|
79 |
+
let's continue with the next steps which involve training the model, making predictions, and evaluating the model.
|
80 |
+
|
81 |
+
```python
|
82 |
+
from sklearn.model_selection import train_test_split
|
83 |
+
from sklearn.linear_model import LinearRegression
|
84 |
+
from sklearn import metrics
|
85 |
+
|
86 |
+
# Independent variable
|
87 |
+
X = df['Date'].values.reshape(-1,1)
|
88 |
+
# Dependent variable
|
89 |
+
y = df['Close'].values.reshape(-1,1)
|
90 |
+
|
91 |
+
# Split the data into training and testing sets
|
92 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
93 |
+
|
94 |
+
# Train the model
|
95 |
+
model = LinearRegression()
|
96 |
+
model.fit(X_train, y_train)
|
97 |
+
|
98 |
+
# Make predictions
|
99 |
+
y_pred = model.predict(X_test)
|
100 |
+
|
101 |
+
# Evaluate the model
|
102 |
+
print('Mean Absolute Error: {:.2f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
|
103 |
+
print('Mean Squared Error: {:.2f}'.format(metrics.mean_squared_error(y_test, y_pred)))
|
104 |
+
print('Root Mean Squared Error: {:.2f}'.format(np.sqrt(metrics.mean_squared_error(y_test, y_pred))))
|
105 |
+
```
|
106 |
+
|
107 |
+
```RESULT
|
108 |
+
Mean Absolute Error: 17.85
|
109 |
+
Mean Squared Error: 491.52
|
110 |
+
Root Mean Squared Error: 22.17
|
111 |
+
```
|
112 |
+
|
113 |
+
""",
|
114 |
+
},
|
115 |
+
{"role": "user", "content": "Keep going"},
|
116 |
+
{
|
117 |
+
"role": "assistant",
|
118 |
+
"content": "This basic Linear Regression model serves as an introduction to stock price prediction, but real-world applications often employ more complex models. The prediction accuracy can be improved by using other models such as ARIMA, LSTM, or incorporating more data points.\nAlways exercise caution when using such models for financial decisions.",
|
119 |
+
},
|
120 |
+
]
|
121 |
+
|
122 |
+
few_shot_3 = [
|
123 |
+
{
|
124 |
+
"role": "user",
|
125 |
+
"content": "I want to analyze the latest news articles about a specific topic. Let's say chatgpt.",
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"role": "assistant",
|
129 |
+
"content": """
|
130 |
+
Sure, we can use the GoogleSearch tool to find the latest articles about "chatgpt".
|
131 |
+
|
132 |
+
```python
|
133 |
+
|
134 |
+
# Search for the latest articles about "chatgpt"
|
135 |
+
results = google_search("chatgpt latest news")
|
136 |
+
print(results)
|
137 |
+
```
|
138 |
+
```RESULT
|
139 |
+
[1] Title : [The latest update for ChatGPT - Release Notes]
|
140 |
+
snippet : Updates to ChatGPT (August 3) · Introducing the ChatGPT app for Android (July 25) · Custom instructions is rolling out in beta (July 20) · Higher message limits ...
|
141 |
+
link : https://help.openai.com/en/articles/6825453-chatgpt-release-notes
|
142 |
+
...
|
143 |
+
[6] Title : [Bing, Bard, ChatGPT, and all the news on AI chatbots - The Verge]
|
144 |
+
snippet : Over the last few months company has posted dozens of AI jobs in the US, France, and China, looking to fill roles that could help build generative AI tools that ...
|
145 |
+
link : https://www.theverge.com/23610427/chatbots-chatgpt-new-bing-google-bard-conversational-ai
|
146 |
+
```
|
147 |
+
|
148 |
+
""",
|
149 |
+
},
|
150 |
+
{"role": "user", "content": "Keep going"},
|
151 |
+
{
|
152 |
+
"role": "assistant",
|
153 |
+
"content": """
|
154 |
+
To analyze the latest news articles about a specific topic here is step-by-step plan :
|
155 |
+
|
156 |
+
# 1. Web Scraping: Use the GoogleSearch class to get a list of URLs of the latest news articles about the given topic. Then use a library like BeautifulSoup to scrape the main body of each article.
|
157 |
+
|
158 |
+
```python
|
159 |
+
from bs4 import BeautifulSoup
|
160 |
+
import re
|
161 |
+
|
162 |
+
def scrape_article(url, retries=3):
|
163 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
164 |
+
for _ in range(retries):
|
165 |
+
try:
|
166 |
+
response = requests.get(url, headers=headers)
|
167 |
+
response.raise_for_status() # Raise an exception if the status code is not 200
|
168 |
+
break
|
169 |
+
except requests.exceptions.RequestException:
|
170 |
+
print(f"Error occurred, retrying {url}")
|
171 |
+
time.sleep(5) # Wait 5 seconds before retrying
|
172 |
+
else: # No successful request after retries
|
173 |
+
print(f"Failed to retrieve {url} after {retries} attempts.")
|
174 |
+
return ""
|
175 |
+
|
176 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
177 |
+
|
178 |
+
paragraphs = soup.find_all('p')
|
179 |
+
article_text = ' '.join([p.get_text() for p in paragraphs])
|
180 |
+
return article_text
|
181 |
+
|
182 |
+
url_results = (somehow filter url only with re)
|
183 |
+
articles = [scrape_article(url) for url in url_results]
|
184 |
+
```
|
185 |
+
|
186 |
+
|
187 |
+
""",
|
188 |
+
},
|
189 |
+
{"role": "user", "content": "Keep going"},
|
190 |
+
]
|
191 |
+
|
192 |
+
|
193 |
+
few_shot_4 = [
|
194 |
+
{
|
195 |
+
"role": "user",
|
196 |
+
"content": "Find walmart same store sales growth from Q1 2019 to Q3 2020 by using official data such as Wal-Mart's 10-Q and 10-K disclosures",
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"role": "assistant",
|
200 |
+
"content": """
|
201 |
+
|
202 |
+
To find Walmart's same store sales growth from Q1 2019 to Q3 2020, we need to find Reliable Source first.
|
203 |
+
|
204 |
+
# 1. Identify a Reliable Source:
|
205 |
+
|
206 |
+
To Identify a Reliable Source I need to use google_search function to search the Reliable source
|
207 |
+
|
208 |
+
```python
|
209 |
+
results = google_search("Walmart Investor Relations 10-Q and 10-K reports")
|
210 |
+
print(results)
|
211 |
+
```
|
212 |
+
```RESULT
|
213 |
+
[1] Title : [Walmart SEC Filings]
|
214 |
+
snippet : SEC Filings. Walmart provides its filings with the U.S. Securities and Exchange Commission. ... Annual Report of Employee Stock Plans.
|
215 |
+
link : https://stock.walmart.com/financials/sec-filings/default.aspx
|
216 |
+
...
|
217 |
+
[6] Title : [Form 10-K to "Walmart]
|
218 |
+
snippet : Our SEC filings, our Code of Ethics for our CEO and senior financial officers and our Statement of Ethics can be found on our website at www.stock.walmart.com.
|
219 |
+
link : https://www.sec.gov/Archives/edgar/data/104169/000010416919000016/wmtform10-kx1312019.htm
|
220 |
+
```
|
221 |
+
|
222 |
+
""",
|
223 |
+
},
|
224 |
+
{"role": "user", "content": "Keep going"},
|
225 |
+
{
|
226 |
+
"role": "assistant",
|
227 |
+
"content": """
|
228 |
+
|
229 |
+
Next step is to
|
230 |
+
|
231 |
+
# 2. Choose a Source for Detailed Financial Disclosures and scraping
|
232 |
+
|
233 |
+
From the results provided, there are six main contenders:
|
234 |
+
|
235 |
+
Among those [6] the direct link from the U.S. Securities and Exchange Commission's (SEC) EDGAR database.
|
236 |
+
Given the ease of scraping .htm format with libraries like lxml, and the authenticity of the data from the SEC's official site, I would recommend choosing the link from the EDGAR system.
|
237 |
+
|
238 |
+
```python
|
239 |
+
chosen_link = "https://www.sec.gov/Archives/edgar/data/104169/000010416919000016/wmtform10-kx1312019.htm"
|
240 |
+
|
241 |
+
# define headers
|
242 |
+
headers = {
|
243 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
244 |
+
|
245 |
+
# print request to see the contents
|
246 |
+
```
|
247 |
+
|
248 |
+
|
249 |
+
""",
|
250 |
+
},
|
251 |
+
]
|
252 |
+
|
253 |
+
extra_prompt = """
|
254 |
+
|
255 |
+
+ You are so have to check the code is working right. (by printing or plotting)
|
256 |
+
+ Do not leave function alone. Make sure call the function to check it is working correctly
|
257 |
+
+ As an Code Interperter, You aare able to browse the internet or access documents directly (by using beautifulsoup or requests this will need cleaning the text)
|
258 |
+
+ Provide Dummy data and test the function if needed
|
259 |
+
+ 'Do not' pip install
|
260 |
+
+ You must need to use datetime to check current date
|
261 |
+
For Example,
|
262 |
+
from datetime import datetime, timedelta
|
263 |
+
# Download data for 180days
|
264 |
+
data = yf.download('GOOGL', start=datetime.today(), end=end_date - timedelta(days=180))
|
265 |
+
|
266 |
+
+ make sure to use yfinance for financial data gathering (do not use pandas_datareader)
|
267 |
+
+ when plotting you need to
|
268 |
+
[x] plt.show()
|
269 |
+
[o] plt.savefig('./tmp/plot.png')
|
270 |
+
...
|
271 |
+
then
|
272 |
+
![plot]('./tmp/plot.png')
|
273 |
+
|
274 |
+
|
275 |
+
|
276 |
+
Let's think step-by-step
|
277 |
+
"""
|
Llama2-Code-Interpreter/requirements.txt
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.21.0
|
2 |
+
bitsandbytes==0.41.1
|
3 |
+
colorama==0.4.6
|
4 |
+
coloredlogs==15.0.1
|
5 |
+
colorlog==6.7.0
|
6 |
+
datasets==2.12.0
|
7 |
+
deepspeed==0.10.1
|
8 |
+
diffusers==0.20.0
|
9 |
+
einops==0.6.1
|
10 |
+
gradio==3.37.0
|
11 |
+
ipykernel==6.25.1
|
12 |
+
ipython==8.12.2
|
13 |
+
jupyter_client==8.3.0
|
14 |
+
jupyter_core==5.3.0
|
15 |
+
Markdown==3.4.3
|
16 |
+
nbclient==0.8.0
|
17 |
+
nbconvert==7.7.1
|
18 |
+
nbformat==5.8.0
|
19 |
+
omegaconf==2.3.0
|
20 |
+
openai==0.27.7
|
21 |
+
peft @ git+https://github.com/huggingface/peft.git@6c44096c7b8d55a2ecf24be9bc68393467e1584a
|
22 |
+
rich
|
23 |
+
scikit-learn
|
24 |
+
scipy
|
25 |
+
seaborn
|
26 |
+
sentencepiece==0.1.99
|
27 |
+
termcolor==2.3.0
|
28 |
+
tqdm
|
29 |
+
transformers @ git+https://github.com/huggingface/transformers@f26099e7b5cf579f99a42bab6ddd371bf2c8d548
|
30 |
+
triton==2.0.0
|
31 |
+
yfinance==0.2.28
|
32 |
+
retrying===1.3.4
|
Llama2-Code-Interpreter/utils/__pycache__/special_tok_llama2.cpython-311.pyc
ADDED
Binary file (646 Bytes). View file
|
|
Llama2-Code-Interpreter/utils/check_nb_out.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nbformat
|
2 |
+
from nbconvert.preprocessors import ExecutePreprocessor
|
3 |
+
from nbconvert.preprocessors.execute import CellExecutionError
|
4 |
+
|
5 |
+
nb = nbformat.v4.new_notebook()
|
6 |
+
|
7 |
+
# Add a cell with your code
|
8 |
+
code_cell = nbformat.v4.new_code_cell(source=f'import os\nprint(os.getcwd())')
|
9 |
+
nb.cells.append(code_cell)
|
10 |
+
|
11 |
+
# Execute the notebook
|
12 |
+
ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
|
13 |
+
output_str, error_str = None, None
|
14 |
+
|
15 |
+
ep.preprocess(nb)
|
16 |
+
if nb.cells[0].outputs: # Check if there are any outputs
|
17 |
+
output = nb.cells[-1].outputs[0]
|
18 |
+
|
19 |
+
print(output)
|
20 |
+
# Repo path :: /home/seungyoun/llama_code_interpreter\n
|
Llama2-Code-Interpreter/utils/check_nb_plot_img_out.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nbformat
|
2 |
+
from nbconvert.preprocessors import ExecutePreprocessor
|
3 |
+
from nbconvert.preprocessors.execute import CellExecutionError
|
4 |
+
import base64
|
5 |
+
from io import BytesIO
|
6 |
+
import re
|
7 |
+
|
8 |
+
def get_error_message(traceback_str):
|
9 |
+
lines = traceback_str.split('\n')
|
10 |
+
for line in lines:
|
11 |
+
if 'Error:' in line:
|
12 |
+
return line
|
13 |
+
return None # Return None if no error message is found
|
14 |
+
|
15 |
+
|
16 |
+
nb = nbformat.v4.new_notebook()
|
17 |
+
|
18 |
+
SITE_PKG_ERROR_PREFIX = 'File /usr/local/lib/python3.8/'
|
19 |
+
|
20 |
+
code_sample = """
|
21 |
+
import yfinance as yf
|
22 |
+
import matplotlib.pyplot as plt
|
23 |
+
|
24 |
+
# Get the data of the Tesla USD stock price
|
25 |
+
tsla = yf.Ticker("TSLA-USD")
|
26 |
+
|
27 |
+
# Get the historical prices for the last 3 months
|
28 |
+
tsla_hist = tsla.history(period="max", start="3 months ago")
|
29 |
+
|
30 |
+
# Plot the close prices
|
31 |
+
tsla_hist['Close'].plot(figsize=(16, 9))
|
32 |
+
plt.title('Tesla stock price last 3 months')
|
33 |
+
plt.xlabel('Date')
|
34 |
+
plt.ylabel('Price (USD)')
|
35 |
+
plt.show()
|
36 |
+
"""
|
37 |
+
|
38 |
+
# Add a cell with your code
|
39 |
+
code_cell = nbformat.v4.new_code_cell(source=code_sample)
|
40 |
+
nb.cells.append(code_cell)
|
41 |
+
|
42 |
+
# Execute the notebook
|
43 |
+
ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
|
44 |
+
output_str, error_str = None, None
|
45 |
+
|
46 |
+
try:
|
47 |
+
ep.preprocess(nb)
|
48 |
+
if nb.cells[0].outputs: # Check if there are any outputs
|
49 |
+
for i,c in enumerate(nb.cells[-1].outputs):
|
50 |
+
print(f'[{i+1}] : {c}')
|
51 |
+
|
52 |
+
except CellExecutionError as e:
|
53 |
+
error_str = e
|
54 |
+
|
55 |
+
if error_str is not None:
|
56 |
+
# Get the traceback, which is a list of strings, and join them into one string
|
57 |
+
filtered_error_msg = error_str.__str__().split('An error occurred while executing the following cell')[-1].split("\n------------------\n")[-1]
|
58 |
+
raw_error_msg = "".join(filtered_error_msg)
|
59 |
+
|
60 |
+
# Remove escape sequences for colored text
|
61 |
+
#print(raw_error_msg)
|
62 |
+
ansi_escape = re.compile(r'\x1b\[[0-?]*[ -/]*[@-~]')
|
63 |
+
error_msg = ansi_escape.sub('', raw_error_msg)
|
64 |
+
|
65 |
+
error_msg_only_cell = error_msg.split(SITE_PKG_ERROR_PREFIX)
|
66 |
+
for i,c in enumerate(error_msg_only_cell):
|
67 |
+
if i ==0:
|
68 |
+
print(f'[{i+1}]\n{c.strip()}\n---')
|
69 |
+
if i==3:
|
70 |
+
error_header = get_error_message(c)
|
71 |
+
print(error_header)
|
72 |
+
|
73 |
+
|
74 |
+
#error_msg = raw_error_msg.replace("\x1b[0m", "").replace("\x1b[0;31m", "").replace("\x1b[0;32m", "").replace("\x1b[1;32m", "").replace("\x1b[38;5;241m", "").replace("\x1b[38;5;28;01m", "").replace("\x1b[38;5;21m", "").replace("\x1b[38;5;28m", "").replace("\x1b[43m", "").replace("\x1b[49m", "").replace("\x1b[38;5;241;43m", "").replace("\x1b[39;49m", "").replace("\x1b[0;36m", "").replace("\x1b[0;39m", "")
|
75 |
+
error_lines = error_msg.split("\n")
|
76 |
+
|
77 |
+
# Only keep the lines up to (and including) the first line that contains 'Error' followed by a ':'
|
78 |
+
error_lines = error_lines[:next(i for i, line in enumerate(error_lines) if 'Error:' in line) + 1]
|
79 |
+
|
80 |
+
# Join the lines back into a single string
|
81 |
+
error_msg = "\n".join(error_lines)
|
Llama2-Code-Interpreter/utils/cleaner.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
SITE_PKG_ERROR_PREFIX = 'File /usr/local/lib/python3.8/'
|
4 |
+
|
5 |
+
def get_error_header(traceback_str):
|
6 |
+
lines = traceback_str.split('\n')
|
7 |
+
for line in lines:
|
8 |
+
if 'Error:' in line:
|
9 |
+
return line
|
10 |
+
return '' # Return None if no error message is found
|
11 |
+
|
12 |
+
def clean_error_msg(error_str:str =''):
|
13 |
+
filtered_error_msg = error_str.__str__().split('An error occurred while executing the following cell')[-1].split("\n------------------\n")[-1]
|
14 |
+
raw_error_msg = "".join(filtered_error_msg)
|
15 |
+
|
16 |
+
# Remove escape sequences for colored text
|
17 |
+
ansi_escape = re.compile(r'\x1b\[[0-?]*[ -/]*[@-~]')
|
18 |
+
error_msg = ansi_escape.sub('', raw_error_msg)
|
19 |
+
|
20 |
+
error_str_out = ''
|
21 |
+
error_msg_only_cell = error_msg.split(SITE_PKG_ERROR_PREFIX)
|
22 |
+
|
23 |
+
error_str_out += f'{error_msg_only_cell[0]}\n'
|
24 |
+
error_header = get_error_header(error_msg_only_cell[-1])
|
25 |
+
if error_header not in error_str_out:
|
26 |
+
error_str_out += get_error_header(error_msg_only_cell[-1])
|
27 |
+
|
28 |
+
return error_str_out
|
Llama2-Code-Interpreter/utils/const.py
ADDED
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Literal, Optional, Tuple, TypedDict
|
2 |
+
|
3 |
+
|
4 |
+
Role = Literal["system", "user", "assistant"]
|
5 |
+
|
6 |
+
|
7 |
+
class Message(TypedDict):
|
8 |
+
role: Role
|
9 |
+
content: str
|
10 |
+
|
11 |
+
|
12 |
+
Dialog = List[Message]
|
13 |
+
|
14 |
+
model_path = "./ckpt/llama-2-13b-chat"
|
15 |
+
|
16 |
+
B_INST, E_INST = "[INST]", "[/INST]"
|
17 |
+
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
|
18 |
+
DEFAULT_PAD_TOKEN = "[PAD]"
|
19 |
+
DEFAULT_EOS_TOKEN = "</s>"
|
20 |
+
DEFAULT_BOS_TOKEN = "<s>"
|
21 |
+
DEFAULT_UNK_TOKEN = "<unk>"
|
22 |
+
|
23 |
+
IMPORT_PKG = """
|
24 |
+
import numpy as np
|
25 |
+
import pandas as pd
|
26 |
+
import matplotlib.pyplot as plt
|
27 |
+
import seaborn as sns
|
28 |
+
from scipy import stats
|
29 |
+
import os,sys
|
30 |
+
import re
|
31 |
+
from datetime import datetime
|
32 |
+
from sympy import symbols, Eq, solve
|
33 |
+
import torch
|
34 |
+
import requests
|
35 |
+
from bs4 import BeautifulSoup
|
36 |
+
import json
|
37 |
+
import math
|
38 |
+
import yfinance
|
39 |
+
"""
|
40 |
+
|
41 |
+
TOOLS_CODE = r"""
|
42 |
+
|
43 |
+
import requests
|
44 |
+
import tweepy
|
45 |
+
import json,time
|
46 |
+
from urllib.parse import quote_plus
|
47 |
+
from typing import Dict
|
48 |
+
from bs4 import BeautifulSoup
|
49 |
+
|
50 |
+
|
51 |
+
#Goolge Search
|
52 |
+
GOOGLE_API_KEY = "<YOUR>"
|
53 |
+
GOOGLE_CSE_ID = '<YOUR>'
|
54 |
+
MAX_GOOGLE_RESULT = 5
|
55 |
+
|
56 |
+
#Twitter Key
|
57 |
+
TWITTER_API_KEY = "<YOUR>"
|
58 |
+
TWITTER_API_KEY_SECRET = "<YOUR>"
|
59 |
+
TWITTER_ACCESS_TOKEN = "<YOUR>"
|
60 |
+
TWITTER_TOKEN_SECRET = "<YOUR>"
|
61 |
+
|
62 |
+
class GoogleSearch:
|
63 |
+
def __init__(self):
|
64 |
+
self.api_key = GOOGLE_API_KEY
|
65 |
+
self.cse_id = GOOGLE_CSE_ID
|
66 |
+
self.url = "https://www.googleapis.com/customsearch/v1"
|
67 |
+
|
68 |
+
def search(self, search_term, **kwargs):
|
69 |
+
params = {
|
70 |
+
'q': search_term,
|
71 |
+
'key': self.api_key,
|
72 |
+
'cx': self.cse_id,
|
73 |
+
}
|
74 |
+
params.update(kwargs)
|
75 |
+
response = requests.get(self.url, params=params)
|
76 |
+
return response.json()
|
77 |
+
|
78 |
+
def __call__(self, search_term, **kwargs):
|
79 |
+
results = self.search(search_term, **kwargs)
|
80 |
+
output_str = ''
|
81 |
+
for idx, item in enumerate(results.get('items', [])):
|
82 |
+
if idx > MAX_GOOGLE_RESULT:
|
83 |
+
break
|
84 |
+
title = item.get('title').replace('\n','')
|
85 |
+
snippet = item.get('snippet').replace('\n','')
|
86 |
+
link = item.get('link').replace('\n','')
|
87 |
+
output_str += f"[{idx+1}] Title : [{title}]\n\tsnippet : {snippet}\n\tlink : {link}\n"
|
88 |
+
return output_str
|
89 |
+
|
90 |
+
|
91 |
+
class ArxivAPI:
|
92 |
+
def __init__(self):
|
93 |
+
self.base_url = 'http://export.arxiv.org/api/query?'
|
94 |
+
self.headers = {'User-Agent': 'Mozilla/5.0'}
|
95 |
+
|
96 |
+
def clean_str(self, results):
|
97 |
+
output_str = ''
|
98 |
+
for idx, result in enumerate(results):
|
99 |
+
output_str += f"[{idx+1}]title : {result['title'].strip()}({result['id'].strip()})\n"
|
100 |
+
return output_str
|
101 |
+
|
102 |
+
def search(self, query: str, max_results: int = 10):
|
103 |
+
query = quote_plus(query)
|
104 |
+
search_query = f'search_query=all:{query}&start=0&max_results={max_results}'
|
105 |
+
url = self.base_url + search_query
|
106 |
+
response = requests.get(url, headers=self.headers)
|
107 |
+
if response.status_code != 200:
|
108 |
+
raise Exception(f'Error: {response.status_code}')
|
109 |
+
soup = BeautifulSoup(response.content, 'xml')
|
110 |
+
entries = soup.find_all('entry')
|
111 |
+
results = [{'id': entry.id.text, 'title': entry.title.text} for entry in entries]
|
112 |
+
return self.clean_str(results)
|
113 |
+
|
114 |
+
|
115 |
+
# google
|
116 |
+
google_search = GoogleSearch()
|
117 |
+
|
118 |
+
# Arxiv
|
119 |
+
arxiv = ArxivAPI()
|
120 |
+
"""
|
121 |
+
|
122 |
+
|
123 |
+
CODE_INTERPRETER_SYSTEM_PROMPT = """
|
124 |
+
You are helpful agent that can generate code, execute, getting feedback(results).
|
125 |
+
|
126 |
+
|
127 |
+
For example,
|
128 |
+
###User : what is 52th fibonacci number
|
129 |
+
###Assistant :
|
130 |
+
I think I need to use python code to generate 52th fibonacci number
|
131 |
+
|
132 |
+
```python
|
133 |
+
import numpy as np
|
134 |
+
|
135 |
+
def fibonacci(n):
|
136 |
+
sq = np.sqrt(5)
|
137 |
+
phi = (1 + sq) / 2
|
138 |
+
return round((phi ** n) / sq)
|
139 |
+
|
140 |
+
fibo_52 = fibonacci(52)
|
141 |
+
fibo_52
|
142 |
+
```
|
143 |
+
```RESULT
|
144 |
+
32951280099
|
145 |
+
```
|
146 |
+
The 52nd Fibonacci number is 32951280099.
|
147 |
+
|
148 |
+
+ Each code block is executed individually in each cell of a Jupyter notebook.
|
149 |
+
+ If the generated code runs successfully, there's no need to generate redundant code.
|
150 |
+
+ The code is written to provide answers to the questions.
|
151 |
+
|
152 |
+
"""
|
153 |
+
|
154 |
+
CODE_INTERPRETER_SYSTEM_PROMPT = """
|
155 |
+
You are helpful agent that can generate code, execute, getting feedback(results).
|
156 |
+
|
157 |
+
For example,
|
158 |
+
###User : Show me YTD bitcoin price.
|
159 |
+
###Assistant :
|
160 |
+
|
161 |
+
Here's a sample Python code using pandas, matplotlib, and yfinance to get the Year-to-date (YTD) Bitcoin price and plot it:
|
162 |
+
```python
|
163 |
+
import yfinance as yf
|
164 |
+
import matplotlib.pyplot as plt
|
165 |
+
|
166 |
+
# Get the data of the Bitcoin USD price
|
167 |
+
btc = yf.Ticker("BTC-USD")
|
168 |
+
|
169 |
+
# Get the historical prices for this year
|
170 |
+
btc_hist = btc.history(period="ytd")
|
171 |
+
|
172 |
+
# Plot the close prices
|
173 |
+
btc_hist['Close'].plot(figsize=(16, 9))
|
174 |
+
plt.title('Year-to-date Bitcoin price')
|
175 |
+
plt.xlabel('Date')
|
176 |
+
plt.ylabel('Price (USD)')
|
177 |
+
plt.savefig('./tmp/chart.png')
|
178 |
+
```
|
179 |
+
```RESULT
|
180 |
+
<Figure size 1600x900 with 1 Axes>
|
181 |
+
```
|
182 |
+
|
183 |
+
Here is the chart of the bitcoin close YTD chart : ![chart]('./tmp/chart.png')</s>
|
184 |
+
|
185 |
+
+ Each code block is executed individually in each cell of a Jupyter notebook.
|
186 |
+
+ If the generated code runs successfully, there's no need to generate redundant code.
|
187 |
+
+ The code is written to provide answers to the questions.
|
188 |
+
|
189 |
+
"""
|
190 |
+
|
191 |
+
CODE_INTERPRETER_SYSTEM_PROMPT = """
|
192 |
+
As an advanced language model, you can generate code as part of your responses.
|
193 |
+
To make the code more noticeable and easier to read, please encapsulate it within triple backticks.
|
194 |
+
For instance, if you're providing Python code, wrap it as follows:
|
195 |
+
|
196 |
+
```python
|
197 |
+
print('hellow world')
|
198 |
+
```
|
199 |
+
|
200 |
+
Basically this two tools are provided.
|
201 |
+
|
202 |
+
```python
|
203 |
+
# google
|
204 |
+
google_search = GoogleSearch()
|
205 |
+
results = google_search("Current korean president") #query -> string output
|
206 |
+
print(results) # string
|
207 |
+
|
208 |
+
# Arxiv
|
209 |
+
arxiv = ArxivAPI()
|
210 |
+
results = arxiv.search('embodied ai') #query -> string
|
211 |
+
print(results) # string
|
212 |
+
```
|
213 |
+
|
214 |
+
After presenting the results from the code
|
215 |
+
You will provide a useful explanation or interpretation of the output to further aid your understanding."
|
216 |
+
|
217 |
+
Additionally, when generating plots or figures,
|
218 |
+
I'll save them to a specified path, like ./tmp/plot.png, so that they can be viewed.
|
219 |
+
After saving the plot, I'll use the following markdown syntax to display the image at the end of the response:
|
220 |
+
![plot]('./tmp/plot.png')
|
221 |
+
|
222 |
+
You are using jupyter notebook currently.
|
223 |
+
This approach allows me to visually present data and findings."
|
224 |
+
"""
|
225 |
+
|
226 |
+
CODE_INTERPRETER_SYSTEM_PROMPT_PRESIDENT = """
|
227 |
+
You are helpful agent that can code
|
228 |
+
|
229 |
+
You are avaible to use
|
230 |
+
numpy, beautifulsoup, torch, PIL, opencv, ...
|
231 |
+
|
232 |
+
For example,
|
233 |
+
###User : Who is current president of singapore?
|
234 |
+
###Assistant :
|
235 |
+
|
236 |
+
Here's a sample Python code using pandas, matplotlib, and yfinance to get the Year-to-date (YTD) Bitcoin price and plot it:
|
237 |
+
```python
|
238 |
+
import requests
|
239 |
+
from bs4 import BeautifulSoup
|
240 |
+
|
241 |
+
def get_current_south_korea_president():
|
242 |
+
url = 'https://www.president.go.kr/president/greeting'
|
243 |
+
response = requests.get(url)
|
244 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
245 |
+
# Find the president's name
|
246 |
+
president_name = soup.find('title').text.strip()
|
247 |
+
return president_name
|
248 |
+
|
249 |
+
get_current_south_korea_president()
|
250 |
+
```
|
251 |
+
```RESULT
|
252 |
+
대한민국 대통령 > 윤석열 대통령 > 취임사
|
253 |
+
```
|
254 |
+
|
255 |
+
The current President of Korea is 윤석열
|
256 |
+
"""
|
257 |
+
|
258 |
+
|
259 |
+
CODE_INTERPRETER_SYSTEM_PROMPT_GPT4 = f"""
|
260 |
+
You are helpful agent that can code
|
261 |
+
|
262 |
+
You are avaible to use
|
263 |
+
{IMPORT_PKG}
|
264 |
+
"""
|
265 |
+
|
266 |
+
|
267 |
+
CODE_INTERPRETER_SYSTEM_PROMPT_GPT4 = f"""
|
268 |
+
You are helpful agent that can code.
|
269 |
+
|
270 |
+
### User : Can you show me the distribution of the current
|
271 |
+
"""
|
272 |
+
|
273 |
+
|
274 |
+
CODE_INTERPRETER_SYSTEM_PROMPT_GPT4_BASE = f"""
|
275 |
+
You are helpful agent that can code.
|
276 |
+
|
277 |
+
For example,
|
278 |
+
|
279 |
+
### User : Show me YTD bitcoin pirce.
|
280 |
+
|
281 |
+
### Assistant : Sure thing! Here's a Python script using yfinance to get the YTD Bitcoin price and save it into a CSV file using pandas.
|
282 |
+
It also plots the price using matplotlib. Please note we are saving the plot in the
|
283 |
+
./tmp/ directory as 'bitcoin_YTD.png' and data as 'bitcoin_YTD.csv'.
|
284 |
+
|
285 |
+
```python
|
286 |
+
import yfinance as yf
|
287 |
+
import matplotlib.pyplot as plt
|
288 |
+
import pandas as pd\nfrom datetime import datetime
|
289 |
+
|
290 |
+
# Get the current year
|
291 |
+
year = datetime.now().year
|
292 |
+
# Get the data of Bitcoin from the beginning of this year until now
|
293 |
+
btc = yf.download('BTC-USD', start=str(year)+'-01-01', end=datetime.now().strftime(\"%Y-%m-%d\"))
|
294 |
+
|
295 |
+
# Save the data to a .csv file
|
296 |
+
btc.to_csv('./tmp/bitcoin_YTD.csv')
|
297 |
+
|
298 |
+
# Create a plot
|
299 |
+
plt.figure(figsize=(14, 7))
|
300 |
+
plt.plot(btc['Close'])
|
301 |
+
plt.title('Bitcoin Price YTD')
|
302 |
+
plt.xlabel('Date')
|
303 |
+
plt.ylabel('Price'
|
304 |
+
nplt.grid(True)
|
305 |
+
plt.savefig('./tmp/bitcoin_YTD.png')
|
306 |
+
```
|
307 |
+
|
308 |
+
```RESULTS
|
309 |
+
[*********************100%***********************] 1 of 1 completed
|
310 |
+
<Figure size 1400x700 with 1 Axes>
|
311 |
+
```
|
312 |
+
|
313 |
+
Here is plot : ![BTC-YTD]('./tmp/bitcoin_YTD.png')
|
314 |
+
"""
|
Llama2-Code-Interpreter/utils/convert_llama_weights_to_hf.py
ADDED
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2022 EleutherAI and The HuggingFace Inc. team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
import argparse
|
15 |
+
import gc
|
16 |
+
import json
|
17 |
+
import os
|
18 |
+
import shutil
|
19 |
+
import warnings
|
20 |
+
|
21 |
+
import torch
|
22 |
+
|
23 |
+
from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
|
24 |
+
|
25 |
+
|
26 |
+
try:
|
27 |
+
from transformers import LlamaTokenizerFast
|
28 |
+
except ImportError as e:
|
29 |
+
warnings.warn(e)
|
30 |
+
warnings.warn(
|
31 |
+
"The converted tokenizer will be the `slow` tokenizer. To use the fast, update your `tokenizers` library and re-run the tokenizer conversion"
|
32 |
+
)
|
33 |
+
LlamaTokenizerFast = None
|
34 |
+
|
35 |
+
"""
|
36 |
+
Sample usage:
|
37 |
+
|
38 |
+
```
|
39 |
+
python src/transformers/models/llama/convert_llama_weights_to_hf.py \
|
40 |
+
--input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path
|
41 |
+
```
|
42 |
+
|
43 |
+
Thereafter, models can be loaded via:
|
44 |
+
|
45 |
+
```py
|
46 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
47 |
+
|
48 |
+
model = LlamaForCausalLM.from_pretrained("/output/path")
|
49 |
+
tokenizer = LlamaTokenizer.from_pretrained("/output/path")
|
50 |
+
```
|
51 |
+
|
52 |
+
Important note: you need to be able to host the whole model in RAM to execute this script (even if the biggest versions
|
53 |
+
come in several checkpoints they each contain a part of each weight of the model, so we need to load them all in RAM).
|
54 |
+
"""
|
55 |
+
|
56 |
+
INTERMEDIATE_SIZE_MAP = {
|
57 |
+
"7B": 11008,
|
58 |
+
"13B": 13824,
|
59 |
+
"30B": 17920,
|
60 |
+
"65B": 22016,
|
61 |
+
"70B": 28672,
|
62 |
+
}
|
63 |
+
NUM_SHARDS = {
|
64 |
+
"7B": 1,
|
65 |
+
"7Bf": 1,
|
66 |
+
"13B": 2,
|
67 |
+
"13Bf": 2,
|
68 |
+
"30B": 4,
|
69 |
+
"65B": 8,
|
70 |
+
"70B": 8,
|
71 |
+
"70Bf": 8,
|
72 |
+
}
|
73 |
+
|
74 |
+
|
75 |
+
def compute_intermediate_size(n, ffn_dim_multiplier=1, multiple_of=256):
|
76 |
+
return multiple_of * (
|
77 |
+
(int(ffn_dim_multiplier * int(8 * n / 3)) + multiple_of - 1) // multiple_of
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
def read_json(path):
|
82 |
+
with open(path, "r") as f:
|
83 |
+
return json.load(f)
|
84 |
+
|
85 |
+
|
86 |
+
def write_json(text, path):
|
87 |
+
with open(path, "w") as f:
|
88 |
+
json.dump(text, f)
|
89 |
+
|
90 |
+
|
91 |
+
def write_model(model_path, input_base_path, model_size, safe_serialization=True):
|
92 |
+
os.makedirs(model_path, exist_ok=True)
|
93 |
+
tmp_model_path = os.path.join(model_path, "tmp")
|
94 |
+
os.makedirs(tmp_model_path, exist_ok=True)
|
95 |
+
|
96 |
+
input_base_path = "/home/seungyoun/llama/ckpt/llama-2-7b"
|
97 |
+
params = read_json(os.path.join(input_base_path, "params.json"))
|
98 |
+
num_shards = NUM_SHARDS[model_size]
|
99 |
+
n_layers = params["n_layers"]
|
100 |
+
n_heads = params["n_heads"]
|
101 |
+
n_heads_per_shard = n_heads // num_shards
|
102 |
+
dim = params["dim"]
|
103 |
+
dims_per_head = dim // n_heads
|
104 |
+
base = 10000.0
|
105 |
+
inv_freq = 1.0 / (
|
106 |
+
base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head)
|
107 |
+
)
|
108 |
+
|
109 |
+
if "n_kv_heads" in params:
|
110 |
+
num_key_value_heads = params["n_kv_heads"] # for GQA / MQA
|
111 |
+
num_local_key_value_heads = n_heads_per_shard // num_key_value_heads
|
112 |
+
key_value_dim = dim // num_key_value_heads
|
113 |
+
else: # compatibility with other checkpoints
|
114 |
+
num_key_value_heads = n_heads
|
115 |
+
num_local_key_value_heads = n_heads_per_shard
|
116 |
+
key_value_dim = dim
|
117 |
+
|
118 |
+
# permute for sliced rotary
|
119 |
+
def permute(w, n_heads=n_heads, dim1=dim, dim2=dim):
|
120 |
+
return (
|
121 |
+
w.view(n_heads, dim1 // n_heads // 2, 2, dim2)
|
122 |
+
.transpose(1, 2)
|
123 |
+
.reshape(dim1, dim2)
|
124 |
+
)
|
125 |
+
|
126 |
+
print(f"Fetching all parameters from the checkpoint at {input_base_path}.")
|
127 |
+
# Load weights
|
128 |
+
if model_size == "7B":
|
129 |
+
# Not sharded
|
130 |
+
# (The sharded implementation would also work, but this is simpler.)
|
131 |
+
loaded = torch.load(
|
132 |
+
os.path.join(input_base_path, "consolidated.00.pth"), map_location="cpu"
|
133 |
+
)
|
134 |
+
else:
|
135 |
+
# Sharded
|
136 |
+
loaded = [
|
137 |
+
torch.load(
|
138 |
+
os.path.join(input_base_path, f"consolidated.{i:02d}.pth"),
|
139 |
+
map_location="cpu",
|
140 |
+
)
|
141 |
+
for i in range(num_shards)
|
142 |
+
]
|
143 |
+
param_count = 0
|
144 |
+
index_dict = {"weight_map": {}}
|
145 |
+
for layer_i in range(n_layers):
|
146 |
+
filename = f"pytorch_model-{layer_i + 1}-of-{n_layers + 1}.bin"
|
147 |
+
if model_size == "7B":
|
148 |
+
# Unsharded
|
149 |
+
state_dict = {
|
150 |
+
f"model.layers.{layer_i}.self_attn.q_proj.weight": permute(
|
151 |
+
loaded[f"layers.{layer_i}.attention.wq.weight"]
|
152 |
+
),
|
153 |
+
f"model.layers.{layer_i}.self_attn.k_proj.weight": permute(
|
154 |
+
loaded[f"layers.{layer_i}.attention.wk.weight"]
|
155 |
+
),
|
156 |
+
f"model.layers.{layer_i}.self_attn.v_proj.weight": loaded[
|
157 |
+
f"layers.{layer_i}.attention.wv.weight"
|
158 |
+
],
|
159 |
+
f"model.layers.{layer_i}.self_attn.o_proj.weight": loaded[
|
160 |
+
f"layers.{layer_i}.attention.wo.weight"
|
161 |
+
],
|
162 |
+
f"model.layers.{layer_i}.mlp.gate_proj.weight": loaded[
|
163 |
+
f"layers.{layer_i}.feed_forward.w1.weight"
|
164 |
+
],
|
165 |
+
f"model.layers.{layer_i}.mlp.down_proj.weight": loaded[
|
166 |
+
f"layers.{layer_i}.feed_forward.w2.weight"
|
167 |
+
],
|
168 |
+
f"model.layers.{layer_i}.mlp.up_proj.weight": loaded[
|
169 |
+
f"layers.{layer_i}.feed_forward.w3.weight"
|
170 |
+
],
|
171 |
+
f"model.layers.{layer_i}.input_layernorm.weight": loaded[
|
172 |
+
f"layers.{layer_i}.attention_norm.weight"
|
173 |
+
],
|
174 |
+
f"model.layers.{layer_i}.post_attention_layernorm.weight": loaded[
|
175 |
+
f"layers.{layer_i}.ffn_norm.weight"
|
176 |
+
],
|
177 |
+
}
|
178 |
+
else:
|
179 |
+
# Sharded
|
180 |
+
# Note that attention.w{q,k,v,o}, feed_fordward.w[1,2,3], attention_norm.weight and ffn_norm.weight share
|
181 |
+
# the same storage object, saving attention_norm and ffn_norm will save other weights too, which is
|
182 |
+
# redundant as other weights will be stitched from multiple shards. To avoid that, they are cloned.
|
183 |
+
|
184 |
+
state_dict = {
|
185 |
+
f"model.layers.{layer_i}.input_layernorm.weight": loaded[0][
|
186 |
+
f"layers.{layer_i}.attention_norm.weight"
|
187 |
+
].clone(),
|
188 |
+
f"model.layers.{layer_i}.post_attention_layernorm.weight": loaded[0][
|
189 |
+
f"layers.{layer_i}.ffn_norm.weight"
|
190 |
+
].clone(),
|
191 |
+
}
|
192 |
+
state_dict[f"model.layers.{layer_i}.self_attn.q_proj.weight"] = permute(
|
193 |
+
torch.cat(
|
194 |
+
[
|
195 |
+
loaded[i][f"layers.{layer_i}.attention.wq.weight"].view(
|
196 |
+
n_heads_per_shard, dims_per_head, dim
|
197 |
+
)
|
198 |
+
for i in range(num_shards)
|
199 |
+
],
|
200 |
+
dim=0,
|
201 |
+
).reshape(dim, dim)
|
202 |
+
)
|
203 |
+
state_dict[f"model.layers.{layer_i}.self_attn.k_proj.weight"] = permute(
|
204 |
+
torch.cat(
|
205 |
+
[
|
206 |
+
loaded[i][f"layers.{layer_i}.attention.wk.weight"].view(
|
207 |
+
num_local_key_value_heads, dims_per_head, dim
|
208 |
+
)
|
209 |
+
for i in range(num_shards)
|
210 |
+
],
|
211 |
+
dim=0,
|
212 |
+
).reshape(key_value_dim, dim),
|
213 |
+
num_key_value_heads,
|
214 |
+
key_value_dim,
|
215 |
+
dim,
|
216 |
+
)
|
217 |
+
state_dict[f"model.layers.{layer_i}.self_attn.v_proj.weight"] = torch.cat(
|
218 |
+
[
|
219 |
+
loaded[i][f"layers.{layer_i}.attention.wv.weight"].view(
|
220 |
+
num_local_key_value_heads, dims_per_head, dim
|
221 |
+
)
|
222 |
+
for i in range(num_shards)
|
223 |
+
],
|
224 |
+
dim=0,
|
225 |
+
).reshape(key_value_dim, dim)
|
226 |
+
|
227 |
+
state_dict[f"model.layers.{layer_i}.self_attn.o_proj.weight"] = torch.cat(
|
228 |
+
[
|
229 |
+
loaded[i][f"layers.{layer_i}.attention.wo.weight"]
|
230 |
+
for i in range(num_shards)
|
231 |
+
],
|
232 |
+
dim=1,
|
233 |
+
)
|
234 |
+
state_dict[f"model.layers.{layer_i}.mlp.gate_proj.weight"] = torch.cat(
|
235 |
+
[
|
236 |
+
loaded[i][f"layers.{layer_i}.feed_forward.w1.weight"]
|
237 |
+
for i in range(num_shards)
|
238 |
+
],
|
239 |
+
dim=0,
|
240 |
+
)
|
241 |
+
state_dict[f"model.layers.{layer_i}.mlp.down_proj.weight"] = torch.cat(
|
242 |
+
[
|
243 |
+
loaded[i][f"layers.{layer_i}.feed_forward.w2.weight"]
|
244 |
+
for i in range(num_shards)
|
245 |
+
],
|
246 |
+
dim=1,
|
247 |
+
)
|
248 |
+
state_dict[f"model.layers.{layer_i}.mlp.up_proj.weight"] = torch.cat(
|
249 |
+
[
|
250 |
+
loaded[i][f"layers.{layer_i}.feed_forward.w3.weight"]
|
251 |
+
for i in range(num_shards)
|
252 |
+
],
|
253 |
+
dim=0,
|
254 |
+
)
|
255 |
+
|
256 |
+
state_dict[f"model.layers.{layer_i}.self_attn.rotary_emb.inv_freq"] = inv_freq
|
257 |
+
for k, v in state_dict.items():
|
258 |
+
index_dict["weight_map"][k] = filename
|
259 |
+
param_count += v.numel()
|
260 |
+
torch.save(state_dict, os.path.join(tmp_model_path, filename))
|
261 |
+
|
262 |
+
filename = f"pytorch_model-{n_layers + 1}-of-{n_layers + 1}.bin"
|
263 |
+
if model_size == "7B":
|
264 |
+
# Unsharded
|
265 |
+
state_dict = {
|
266 |
+
"model.embed_tokens.weight": loaded["tok_embeddings.weight"],
|
267 |
+
"model.norm.weight": loaded["norm.weight"],
|
268 |
+
"lm_head.weight": loaded["output.weight"],
|
269 |
+
}
|
270 |
+
else:
|
271 |
+
state_dict = {
|
272 |
+
"model.norm.weight": loaded[0]["norm.weight"],
|
273 |
+
"model.embed_tokens.weight": torch.cat(
|
274 |
+
[loaded[i]["tok_embeddings.weight"] for i in range(num_shards)], dim=1
|
275 |
+
),
|
276 |
+
"lm_head.weight": torch.cat(
|
277 |
+
[loaded[i]["output.weight"] for i in range(num_shards)], dim=0
|
278 |
+
),
|
279 |
+
}
|
280 |
+
|
281 |
+
for k, v in state_dict.items():
|
282 |
+
index_dict["weight_map"][k] = filename
|
283 |
+
param_count += v.numel()
|
284 |
+
torch.save(state_dict, os.path.join(tmp_model_path, filename))
|
285 |
+
|
286 |
+
# Write configs
|
287 |
+
index_dict["metadata"] = {"total_size": param_count * 2}
|
288 |
+
write_json(index_dict, os.path.join(tmp_model_path, "pytorch_model.bin.index.json"))
|
289 |
+
ffn_dim_multiplier = (
|
290 |
+
params["ffn_dim_multiplier"] if "ffn_dim_multiplier" in params else 1
|
291 |
+
)
|
292 |
+
multiple_of = params["multiple_of"] if "multiple_of" in params else 256
|
293 |
+
config = LlamaConfig(
|
294 |
+
hidden_size=dim,
|
295 |
+
intermediate_size=compute_intermediate_size(
|
296 |
+
dim, ffn_dim_multiplier, multiple_of
|
297 |
+
),
|
298 |
+
num_attention_heads=params["n_heads"],
|
299 |
+
num_hidden_layers=params["n_layers"],
|
300 |
+
rms_norm_eps=params["norm_eps"],
|
301 |
+
num_key_value_heads=num_key_value_heads,
|
302 |
+
)
|
303 |
+
config.save_pretrained(tmp_model_path)
|
304 |
+
|
305 |
+
# Make space so we can load the model properly now.
|
306 |
+
del state_dict
|
307 |
+
del loaded
|
308 |
+
gc.collect()
|
309 |
+
|
310 |
+
print("Loading the checkpoint in a Llama model.")
|
311 |
+
model = LlamaForCausalLM.from_pretrained(
|
312 |
+
tmp_model_path, torch_dtype=torch.float16, low_cpu_mem_usage=True
|
313 |
+
)
|
314 |
+
# Avoid saving this as part of the config.
|
315 |
+
del model.config._name_or_path
|
316 |
+
|
317 |
+
print("Saving in the Transformers format.")
|
318 |
+
model.save_pretrained(model_path, safe_serialization=safe_serialization)
|
319 |
+
shutil.rmtree(tmp_model_path)
|
320 |
+
|
321 |
+
|
322 |
+
def write_tokenizer(tokenizer_path, input_tokenizer_path):
|
323 |
+
# Initialize the tokenizer based on the `spm` model
|
324 |
+
tokenizer_class = (
|
325 |
+
LlamaTokenizer if LlamaTokenizerFast is None else LlamaTokenizerFast
|
326 |
+
)
|
327 |
+
print(f"Saving a {tokenizer_class.__name__} to {tokenizer_path}.")
|
328 |
+
tokenizer = tokenizer_class(input_tokenizer_path)
|
329 |
+
tokenizer.save_pretrained(tokenizer_path)
|
330 |
+
|
331 |
+
|
332 |
+
def main():
|
333 |
+
parser = argparse.ArgumentParser()
|
334 |
+
parser.add_argument(
|
335 |
+
"--input_dir",
|
336 |
+
help="Location of LLaMA weights, which contains tokenizer.model and model folders",
|
337 |
+
)
|
338 |
+
parser.add_argument(
|
339 |
+
"--model_size",
|
340 |
+
choices=[
|
341 |
+
"7B",
|
342 |
+
"7Bf",
|
343 |
+
"13B",
|
344 |
+
"13Bf",
|
345 |
+
"30B",
|
346 |
+
"65B",
|
347 |
+
"70B",
|
348 |
+
"70Bf",
|
349 |
+
"tokenizer_only",
|
350 |
+
],
|
351 |
+
)
|
352 |
+
parser.add_argument(
|
353 |
+
"--output_dir",
|
354 |
+
help="Location to write HF model and tokenizer",
|
355 |
+
)
|
356 |
+
parser.add_argument(
|
357 |
+
"--safe_serialization",
|
358 |
+
type=bool,
|
359 |
+
help="Whether or not to save using `safetensors`.",
|
360 |
+
)
|
361 |
+
args = parser.parse_args()
|
362 |
+
if args.model_size != "tokenizer_only":
|
363 |
+
write_model(
|
364 |
+
model_path=args.output_dir,
|
365 |
+
input_base_path=os.path.join(args.input_dir, args.model_size),
|
366 |
+
model_size=args.model_size,
|
367 |
+
safe_serialization=args.safe_serialization,
|
368 |
+
)
|
369 |
+
spm_path = os.path.join(args.input_dir, "tokenizer.model")
|
370 |
+
spm_path = "/home/seungyoun/llama/ckpt/tokenizer.model"
|
371 |
+
write_tokenizer(args.output_dir, spm_path)
|
372 |
+
|
373 |
+
|
374 |
+
if __name__ == "__main__":
|
375 |
+
main()
|
Llama2-Code-Interpreter/utils/special_tok_llama2.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
B_CODE = "[CODE_START_TOK]"
|
2 |
+
E_CODE = "[/CODE_END_TOK]"
|
3 |
+
|
4 |
+
B_RESULT = "[RESULT_TOK]"
|
5 |
+
E_RESULT = "[/RESULT_TOK]"
|
6 |
+
|
7 |
+
B_INST, E_INST = "[INST]", "[/INST]"
|
8 |
+
B_SYS, E_SYS = "<<SYS>>", "<</SYS>>"
|
9 |
+
|
10 |
+
IGNORE_INDEX = -100
|
11 |
+
DEFAULT_PAD_TOKEN = "[PAD]"
|
12 |
+
DEFAULT_EOS_TOKEN = "</s>"
|
13 |
+
DEFAULT_BOS_TOKEN = "<s>"
|
14 |
+
DEFAULT_UNK_TOKEN = "<unk>"
|
OpenCodeInterpreter/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
OpenCodeInterpreter/README.md
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# OpenCodeInterpreter: Integrating Code Generation with Execution and Refinement
|
2 |
+
|
3 |
+
<p align="center">
|
4 |
+
<img width="1000px" alt="OpenCodeInterpreter" src="https://opencodeinterpreter.github.io/static/images/figure1.png">
|
5 |
+
</p>
|
6 |
+
<p align="center">
|
7 |
+
<a href="https://opencodeinterpreter.github.io/">[🏠Homepage]</a>
|
8 |
+
|
|
9 |
+
<a href="https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/">[🛠️Code]</a>
|
10 |
+
</p>
|
11 |
+
<hr>
|
12 |
+
|
13 |
+
## 🌟 Upcoming Features
|
14 |
+
- 💡 **Open Sourcing OpenCodeInterpreter-SC2 series Model (based on StarCoder2 base)**
|
15 |
+
|
16 |
+
- 💡 **Open Sourcing OpenCodeInterpreter-GM-7b Model with gemma-7b Base**
|
17 |
+
|
18 |
+
## 🔔News
|
19 |
+
🛠️[2024-02-29]: Our official online demo is deployed on HuggingFace Spaces! Take a look at [Demo Page](https://huggingface.co/spaces/m-a-p/OpenCodeInterpreter_demo)!
|
20 |
+
|
21 |
+
🛠️[2024-02-28]: We have open-sourced the Demo Local Deployment Code with a Setup Guide.
|
22 |
+
|
23 |
+
✨[2024-02-26]: We have open-sourced the [OpenCodeInterpreter-DS-1.3b](https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-1.3B) Model.
|
24 |
+
|
25 |
+
📘[2024-02-26]: We have open-sourced the [CodeFeedback-Filtered-Instruction](https://huggingface.co/datasets/m-a-p/CodeFeedback-Filtered-Instruction) Dataset.
|
26 |
+
|
27 |
+
🚀[2024-02-23]: We have open-sourced the datasets used in our project named [Code-Feedback](https://huggingface.co/datasets/m-a-p/Code-Feedback).
|
28 |
+
|
29 |
+
🔥[2024-02-19]: We have open-sourced all models in the OpenCodeInterpreter series! We welcome everyone to try out our models and look forward to your participation! 😆
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
## Introduction
|
34 |
+
OpenCodeInterpreter is a suite of open-source code generation systems aimed at bridging the gap between large language models and sophisticated proprietary systems like the GPT-4 Code Interpreter. It significantly enhances code generation capabilities by integrating execution and iterative refinement functionalities.
|
35 |
+
|
36 |
+
## Models
|
37 |
+
All models within the OpenCodeInterpreter series have been open-sourced on Hugging Face. You can access our models via the following link: [OpenCodeInterpreter Models](https://huggingface.co/collections/m-a-p/opencodeinterpreter-65d312f6f88da990a64da456).
|
38 |
+
|
39 |
+
## Data Collection
|
40 |
+
Supported by Code-Feedback, a dataset featuring 68K multi-turn interactions, OpenCodeInterpreter incorporates execution and human feedback for dynamic code refinement.
|
41 |
+
For additional insights into data collection procedures, please consult the readme provided under [Data Collection](https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/blob/main/data_collection/README.md).
|
42 |
+
|
43 |
+
## Evaluation
|
44 |
+
Our evaluation framework primarily utilizes HumanEval and MBPP, alongside their extended versions, HumanEval+ and MBPP+, leveraging the [EvalPlus framework](https://github.com/evalplus/evalplus) for a more comprehensive assessment.
|
45 |
+
For specific evaluation methodologies, please refer to the [Evaluation README](https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/blob/main/evaluation/README.md) for more details.
|
46 |
+
|
47 |
+
## Demo
|
48 |
+
We're excited to present our open-source demo, enabling users to effortlessly generate and execute code with our LLM locally. Within the demo, users can leverage the power of LLM to generate code and execute it locally, receiving automated execution feedback. LLM dynamically adjusts the code based on this feedback, ensuring a smoother coding experience. Additionally, users can engage in chat-based interactions with the LLM model, providing feedback to further enhance the generated code.
|
49 |
+
|
50 |
+
To begin exploring the demo and experiencing the capabilities firsthand, please refer to the instructions outlined in the [OpenCodeInterpreter Demo README](https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/blob/main/demo/README.md) file. Happy coding!
|
51 |
+
|
52 |
+
### Quick Start
|
53 |
+
- **Entering the workspace**:
|
54 |
+
```bash
|
55 |
+
git clone https://github.com/OpenCodeInterpreter/OpenCodeInterpreter.git
|
56 |
+
cd demo
|
57 |
+
```
|
58 |
+
- **Create a new conda environment**: `conda create -n demo python=3.10`
|
59 |
+
|
60 |
+
- **Activate the demo environment you create**: `conda activate demo`
|
61 |
+
|
62 |
+
- **Install requirements**: `pip install -r requirements.txt`
|
63 |
+
|
64 |
+
- **Create a Huggingface access token with write permission [here](https://huggingface.co/docs/hub/en/security-tokens). Our code will only use this token to create and push content to a specific repository called `opencodeinterpreter_user_data` under your own Huggingface account. We cannot get access to your data if you deploy this demo on your own device.**
|
65 |
+
|
66 |
+
- **Add the access token to environment variables:** `export HF_TOKEN="your huggingface access token"`
|
67 |
+
|
68 |
+
- **Run the Gradio App**:
|
69 |
+
```bash
|
70 |
+
python3 chatbot.py --path "the model name of opencodeinterpreter model family. e.g., m-a-p/OpenCodeInterpreter-DS-6.7B"
|
71 |
+
```
|
72 |
+
### Video
|
73 |
+
https://github.com/OpenCodeInterpreter/OpenCodeInterpreter/assets/46103100/2337f34d-f5ed-4ecb-857b-3c2d085b72fd
|
74 |
+
|
75 |
+
|
76 |
+
## Contact
|
77 |
+
|
78 |
+
If you have any inquiries, please feel free to raise an issue or reach out to us via email at: xiangyue.work@gmail.com, zhengtianyu0428@gmail.com.
|
79 |
+
We're here to assist you!
|
80 |
+
|
81 |
+
## Star History
|
82 |
+
|
83 |
+
[![Star History Chart](https://api.star-history.com/svg?repos=OpenCodeInterpreter/OpenCodeInterpreter&type=Date)](https://star-history.com/#OpenCodeInterpreter/OpenCodeInterpreter&Date)
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README.md
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**Read in other language: [中文](README_CN.md).**
|
2 |
+
|
3 |
+
# Local-Code-Interpreter
|
4 |
+
A local implementation of OpenAI's ChatGPT Code Interpreter (Advanced Data Analysis).
|
5 |
+
|
6 |
+
## Introduction
|
7 |
+
|
8 |
+
OpenAI's Code Interpreter (currently renamed as Advanced Data Analysis) for ChatGPT is a revolutionary feature that allows the execution of Python code within the AI model. However, it execute code within an online sandbox and has certain limitations. In this project, we present Local Code Interpreter – which enables code execution on your local device, offering enhanced flexibility, security, and convenience.
|
9 |
+
![notebook_gif_demo](example_img/save_to_notebook_demo.gif)
|
10 |
+
|
11 |
+
## Key Advantages
|
12 |
+
|
13 |
+
- **Custom Environment**: Execute code in a customized environment of your choice, ensuring you have the right packages and settings.
|
14 |
+
|
15 |
+
- **Seamless Experience**: Say goodbye to file size restrictions and internet issues while uploading. With Local Code Interpreter, you're in full control.
|
16 |
+
|
17 |
+
- **GPT-3.5 Availability**: While official Code Interpreter is only available for GPT-4 model, the Local Code Interpreter offers the flexibility to switch between both GPT-3.5 and GPT-4 models.
|
18 |
+
|
19 |
+
- **Enhanced Data Security**: Keep your data more secure by running code locally, minimizing data transfer over the internet.
|
20 |
+
|
21 |
+
- **Jupyter Support**: You can save all the code and conversation history in a Jupyter notebook for future use.
|
22 |
+
|
23 |
+
## Note
|
24 |
+
Executing AI-generated code without human review on your own device is not safe. You are responsible for taking measures to protect the security of your device and data (such as using a virtural machine) before launching this program. All consequences caused by using this program shall be borne by youself.
|
25 |
+
|
26 |
+
## Usage
|
27 |
+
|
28 |
+
### Installation
|
29 |
+
|
30 |
+
1. Clone this repository to your local device
|
31 |
+
```shell
|
32 |
+
git clone https://github.com/MrGreyfun/Local-Code-Interpreter.git
|
33 |
+
cd Local-Code-Interpreter
|
34 |
+
```
|
35 |
+
|
36 |
+
2. Install the necessary dependencies. The program has been tested on Windows 10 and CentOS Linux 7.8, with Python 3.9.16. Required packages include:
|
37 |
+
```text
|
38 |
+
Jupyter Notebook 6.5.4
|
39 |
+
gradio 3.39.0
|
40 |
+
openai 0.27.8
|
41 |
+
ansi2html 1.8.0
|
42 |
+
tiktoken 0.3.3
|
43 |
+
Pillow 9.4.0
|
44 |
+
```
|
45 |
+
Other systems or package versions may also work. Please note that you should not update the `openai` package to the latest `1.x` version, as it has been rewritten and is not compatible with older versions.
|
46 |
+
You can use the following command to directly install the required packages:
|
47 |
+
```shell
|
48 |
+
pip install -r requirements.txt
|
49 |
+
```
|
50 |
+
For newcomers to Python, we offer a convenient command that installs additional packages commonly used for data processing and analysis:
|
51 |
+
```shell
|
52 |
+
pip install -r requirements_full.txt
|
53 |
+
```
|
54 |
+
### Configuration
|
55 |
+
|
56 |
+
1. Create a `config.json` file in the `src` directory, following the examples provided in the `config_example` directory.
|
57 |
+
|
58 |
+
2. Configure your API key in the `config.json` file.
|
59 |
+
|
60 |
+
Please Note:
|
61 |
+
1. **Set the `model_name` Correctly**
|
62 |
+
This program relies on the function calling capability of the `0613` or newer versions of models:
|
63 |
+
- `gpt-3.5-turbo-0613` (and its 16K version)
|
64 |
+
- `gpt-3.5-turbo-1106`
|
65 |
+
- `gpt-4-0613` (and its 32K version)
|
66 |
+
- `gpt-4-1106-preview`
|
67 |
+
|
68 |
+
Older versions of the models will not work. Note that `gpt-4-vision-preview` lacks support for function calling, therefore, it should not be set as `GPT-4` model.
|
69 |
+
|
70 |
+
For Azure OpenAI service users:
|
71 |
+
- Set the `model_name` as your deployment name.
|
72 |
+
- Confirm that the deployed model corresponds to the `0613` or newer version.
|
73 |
+
|
74 |
+
2. **API Version Settings**
|
75 |
+
If you're using Azure OpenAI service, set the `API_VERSION` to `2023-12-01-preview` in the `config.json` file. Note that API versions older than `2023-07-01-preview` do not support the necessary function calls for this program and `2023-12-01-preview` is recommended as older versions will be deprecated in the near future.
|
76 |
+
|
77 |
+
3. **Vision Model Settings**
|
78 |
+
Despite the `gpt-4-vision-preview` currently does not support function calling, we have implemented vision input using a non-end-to-end approach. To enable vision input, set `gpt-4-vision-preview` as `GPT-4V` model and set `available` to `true`. Conversely, setting `available` to `false` to disables vision input when unnecessary, which will remove vision-related system prompts and reduce your API costs.
|
79 |
+
![vision_demo](example_img/vision_example.jpg)
|
80 |
+
4. **Model Context Window Settings**
|
81 |
+
The `model_context_window` field records the context window for each model, which the program uses to slice conversations when they exceed the model's context window capacity.
|
82 |
+
Azure OpenAI service users should manually insert context window information using the model's deployment name in the following format:
|
83 |
+
```json
|
84 |
+
"<YOUR-DEPLOYMENT-NAME>": <contex_window (integer)>
|
85 |
+
```
|
86 |
+
|
87 |
+
Additionally, when OpenAI introduce new models, you can manually append the new model's context window information using the same format. (We will keep this file updated, but there might be delays)
|
88 |
+
|
89 |
+
5. **Alternate API Key Handling**
|
90 |
+
If you prefer not to store your API key in the `config.json` file, you can opt for an alternate approach:
|
91 |
+
- Leave the `API_KEY` field in `config.json` as an empty string:
|
92 |
+
```json
|
93 |
+
"API_KEY": ""
|
94 |
+
```
|
95 |
+
- Set the environment variable `OPENAI_API_KEY` with your API key before running the program:
|
96 |
+
- On Windows:
|
97 |
+
```shell
|
98 |
+
set OPENAI_API_KEY=<YOUR-API-KEY>
|
99 |
+
```
|
100 |
+
- On Linux:
|
101 |
+
```shell
|
102 |
+
export OPENAI_API_KEY=<YOUR-API-KEY>
|
103 |
+
```
|
104 |
+
|
105 |
+
## Getting Started
|
106 |
+
|
107 |
+
1. Navigate to the `src` directory.
|
108 |
+
```shell
|
109 |
+
cd src
|
110 |
+
```
|
111 |
+
|
112 |
+
2. Run the command:
|
113 |
+
```shell
|
114 |
+
python web_ui.py
|
115 |
+
```
|
116 |
+
|
117 |
+
3. Access the generated link in your browser to start using the Local Code Interpreter.
|
118 |
+
|
119 |
+
4. Use the `-n` or `--notebook` option to save the conversation in a Jupyter notebook.
|
120 |
+
By default, the notebook is saved in the working directory, but you can add a path to save it elsewhere.
|
121 |
+
```shell
|
122 |
+
python web_ui.py -n <path_to_notebook>
|
123 |
+
```
|
124 |
+
|
125 |
+
## Example
|
126 |
+
|
127 |
+
Imagine uploading a data file and requesting the model to perform linear regression and visualize the data. See how Local Code Interpreter provides a seamless experience:
|
128 |
+
|
129 |
+
1. Upload the data and request linear regression:
|
130 |
+
![Example 1](example_img/1.jpg)
|
131 |
+
|
132 |
+
2. Encounter an error in the generated code:
|
133 |
+
![Example 2](example_img/2.jpg)
|
134 |
+
|
135 |
+
3. ChatGPT automatically checks the data structure and fixes the bug:
|
136 |
+
![Example 3](example_img/3.jpg)
|
137 |
+
|
138 |
+
4. The corrected code runs successfully:
|
139 |
+
![Example 4](example_img/4.jpg)
|
140 |
+
|
141 |
+
5. The final result meets your requirements:
|
142 |
+
![Example 5](example_img/5.jpg)
|
143 |
+
![Example 6](example_img/6.jpg)
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README_CN.md
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
**Read in other language: [English](README.md)**
|
2 |
+
|
3 |
+
# 本地代码解释器
|
4 |
+
OpenAI的ChatGPT代码解释器(Code Interpreter或Advanced Data Analysis)的本地版。
|
5 |
+
|
6 |
+
## 简介
|
7 |
+
|
8 |
+
OpenAI的ChatGPT代码解释器(Code Interpreter,现更名为Advanced Data Analysis)是一款强大的AI工具。然而,其在在线沙箱环境中运行代码的特性导致了一些限制,如包的缺失、上传速度较慢、仅支持上传不超过100MB的文件以及代码最多只能运行120秒等。为此,我们推出了本地代码解释器(Local Code Interpreter)。这款工具允许您在自己的设备上,利用自己专属的Python环境来执行ChatGPT生成的代码,从而解除了原有解释器的各种限制。
|
9 |
+
![notebook_gif_demo](example_img/save_to_notebook_demo.gif)
|
10 |
+
|
11 |
+
## 优势
|
12 |
+
|
13 |
+
- **自定义环境**:在您本地环境中运行代码,确保各种依赖都已正确安装。
|
14 |
+
|
15 |
+
- **无缝体验**:告别100MB文件大小限制和网速问题。使用本地版代码解释器,一切尽在掌控之中。
|
16 |
+
|
17 |
+
- **可用GPT-3.5**:官方代码解释器只能在GPT-4中使用,但现在您甚至可以在一轮对话中自由切换GPT-3.5和GPT-4。
|
18 |
+
|
19 |
+
- **数据更安全**:代码在本地运行,无需将文件上传至网络,提高了数据的安全性。
|
20 |
+
|
21 |
+
- **支持Jupyter**:本程序可将代码和对话历史保存至Jupyter notebook文件中供以后使用。
|
22 |
+
|
23 |
+
## 注意事项
|
24 |
+
在您自己的设备上执行AI生成但未经人工审核的代码可能存在安全风险。在运行此程序前,您应当采用一些安全措施,例如使用虚拟机,以保护您的设备和数据。使用此程序所产生的所有后果,您需自行承担。
|
25 |
+
|
26 |
+
## 使用方法
|
27 |
+
|
28 |
+
### 安装
|
29 |
+
|
30 |
+
1. 克隆本仓库
|
31 |
+
```shell
|
32 |
+
git clone https://github.com/MrGreyfun/Local-Code-Interpreter.git
|
33 |
+
cd Local-Code-Interpreter
|
34 |
+
```
|
35 |
+
|
36 |
+
2. 安装依赖。该程序已在Windows 10和CentOS Linux 7.8上使用Python 3.9.16测试。所需的库及版本:
|
37 |
+
```text
|
38 |
+
Jupyter Notebook 6.5.4
|
39 |
+
gradio 3.39.0
|
40 |
+
openai 0.27.8
|
41 |
+
ansi2html 1.8.0
|
42 |
+
```
|
43 |
+
其他系统或库版本也可能有效。请注意,不要将`openai`包升级至最新的`1.x`版本,该版本已重写,与旧版本不兼容。
|
44 |
+
您可以使用以下命令直接安装所需的软件包:
|
45 |
+
```shell
|
46 |
+
pip install -r requirements.txt
|
47 |
+
```
|
48 |
+
如果您不熟悉Python,可以使用以下命令安装,它将额外安装常用的Python数据分析库:
|
49 |
+
```shell
|
50 |
+
pip install -r requirements_full.txt
|
51 |
+
```
|
52 |
+
### 配置
|
53 |
+
|
54 |
+
1. 在`src`目录中创建一个`config.json`文件,参照`config_example`目录中提供的示例进行配置。
|
55 |
+
|
56 |
+
2. 在`config.json`文件中配置您的API密钥。
|
57 |
+
|
58 |
+
请注意:
|
59 |
+
1. **正确设置`model_name`**
|
60 |
+
该程序依赖于`0163`及以上版本的模型的函数调用能力,这些模型包括:
|
61 |
+
- `gpt-3.5-turbo-0613` (及其16K版本)
|
62 |
+
- `gpt-3.5-turbo-1106`
|
63 |
+
- `gpt-4-0613` (及其32K版本)
|
64 |
+
- `gpt-4-1106-preview`
|
65 |
+
|
66 |
+
旧版本的模型将无法使用。请注意,`gpt-4-vision-preview`模型同样不支持函数调用,因此不能将其设置为`GPT-4`模型。
|
67 |
+
|
68 |
+
对于使用Azure OpenAI的用户:
|
69 |
+
- 请将`model_name`设置为您的模型的部署名(deployment name)。
|
70 |
+
- 确认部署的模型是`0613`及以上版本。
|
71 |
+
|
72 |
+
2. **API版本设置**
|
73 |
+
如果您使用Azure OpenAI服务,请在`config.json`文件中将`API_VERSION`设置为`2023-07-01-preview`,其他API版本不支持函数调用。
|
74 |
+
|
75 |
+
3. **视觉模型设置**
|
76 |
+
尽管`gpt-4-vision-preview`模型不支持函数调用,我们仍然通过另一种非端到端的方式实现了图像输入。如果想使用图像输入,请将`gpt-4-vision-preview`设置为`GPT-4V`模型,并设置`available`字段设置为`true`。当不需要使用图像输入时候,可以将`available`字段设置为`false`,这将移除图像相关的系统提示,从而减少您的API费用。
|
77 |
+
![vision_demo](example_img/vision_example.jpg)
|
78 |
+
4. **模型上下文窗口长度设置**
|
79 |
+
`model_context_window` 字段记录了每个模型的上下文窗口长度信息。当对话长度超过模型上下文窗口长度限制时,本程序会使用该信息来压缩对话长度。
|
80 |
+
Azure OpenAI的用户需要按照以下格式,使用模型的部署名手动添加上下文窗口长度信息:
|
81 |
+
```json
|
82 |
+
"<模型部署名>": <上下文窗口长度 (整数)>
|
83 |
+
```
|
84 |
+
此外,当OpenAI推出新模型的时候,您可以按照相同的格式手动添加新模型的上下文窗口长度信息。(我们会持续更新该文件,但是不一定及时)
|
85 |
+
|
86 |
+
5. **使用环境变量配置密钥**
|
87 |
+
如果您不希望将API密钥存储在`config.json`文件中,可以选择通过环境变量来设置密钥:
|
88 |
+
- 将`config.json`文件中的`API_KEY`设为空字符串:
|
89 |
+
```json
|
90 |
+
"API_KEY": ""
|
91 |
+
```
|
92 |
+
- 在运行程序之前,使用您的API密钥设置环境变量`OPENAI_API_KEY`:
|
93 |
+
- Windows:
|
94 |
+
```shell
|
95 |
+
set OPENAI_API_KEY=<你的API密钥>
|
96 |
+
```
|
97 |
+
- Linux:
|
98 |
+
```shell
|
99 |
+
export OPENAI_API_KEY=<你的API密钥>
|
100 |
+
```
|
101 |
+
|
102 |
+
## 使用
|
103 |
+
|
104 |
+
1. 进入`src`目录。
|
105 |
+
```shell
|
106 |
+
cd src
|
107 |
+
```
|
108 |
+
|
109 |
+
2. 运行以下命令:
|
110 |
+
```shell
|
111 |
+
python web_ui.py
|
112 |
+
```
|
113 |
+
|
114 |
+
3. 在浏览器中访问终端生成的链接,开始使用本地版代码解释器。
|
115 |
+
|
116 |
+
4. 添加`-n`或`--notebook`参数可以将对话保存到Jupyter notebook中。
|
117 |
+
默认情况下,该Jupyter notebook文件保存在工作目录中,您可以添加路径以将其保存到其它位置。
|
118 |
+
```shell
|
119 |
+
python web_ui.py-n<path_to_notebook>
|
120 |
+
```
|
121 |
+
|
122 |
+
## 示例
|
123 |
+
|
124 |
+
以下是一个使用本程序进行线性回归任务的示例:
|
125 |
+
|
126 |
+
1. 上传数据文件并要求模型对数据进行线性回归:
|
127 |
+
![Example 1](example_img/1.jpg)
|
128 |
+
|
129 |
+
2. 生成的代码执行中遇到错误:
|
130 |
+
![Example 2](example_img/2.jpg)
|
131 |
+
|
132 |
+
3. ChatGPT自动检查数据格式并修复bug:
|
133 |
+
![Example 3](example_img/3.jpg)
|
134 |
+
|
135 |
+
4. 修复bug后的代码成功运行:
|
136 |
+
![Example 4](example_img/4.jpg)
|
137 |
+
|
138 |
+
5. 最终结果符合要求:
|
139 |
+
![Example 5](example_img/5.jpg)
|
140 |
+
![Example 6](example_img/6.jpg)
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.azure.example.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"API_TYPE": "azure",
|
3 |
+
"API_base": "<YOUR-API-ENDPOINT>",
|
4 |
+
"API_VERSION": "2023-12-01-preview",
|
5 |
+
"API_KEY": "<YOUR-API-KEY>",
|
6 |
+
"model": {
|
7 |
+
"GPT-3.5": {
|
8 |
+
"model_name": "<YOUR-DEPLOYMENT-NAME>",
|
9 |
+
"available": true
|
10 |
+
},
|
11 |
+
"GPT-4": {
|
12 |
+
"model_name": "<YOUR-DEPLOYMENT-NAME>",
|
13 |
+
"available": true
|
14 |
+
},
|
15 |
+
"GPT-4V": {
|
16 |
+
"model_name": "<YOUR-DEPLOYMENT-NAME>",
|
17 |
+
"available": true
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"model_context_window": {
|
21 |
+
"<YOUR-DEPLOYMENT-NAME1>": <contex_window (integer)>,
|
22 |
+
"<YOUR-DEPLOYMENT-NAME2>": <contex_window (integer)>
|
23 |
+
}
|
24 |
+
}
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.example.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"API_TYPE": "open_ai",
|
3 |
+
"API_base": "https://api.openai.com/v1",
|
4 |
+
"API_VERSION": null,
|
5 |
+
"API_KEY": "<YOUR-API-KEY>",
|
6 |
+
"model": {
|
7 |
+
"GPT-3.5": {
|
8 |
+
"model_name": "gpt-3.5-turbo-0613",
|
9 |
+
"available": true
|
10 |
+
},
|
11 |
+
"GPT-4": {
|
12 |
+
"model_name": "gpt-4-0613",
|
13 |
+
"available": true
|
14 |
+
},
|
15 |
+
"GPT-4V": {
|
16 |
+
"model_name": "gpt-4-vision-preview",
|
17 |
+
"available": true
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"model_context_window": {
|
21 |
+
"gpt-3.5-turbo": 4096,
|
22 |
+
"gpt-3.5-turbo-16k": 16385,
|
23 |
+
"gpt-3.5-turbo-0613": 4096,
|
24 |
+
"gpt-3.5-turbo-1106": 16385,
|
25 |
+
"gpt-4": 8192,
|
26 |
+
"gpt-4-32k": 32768,
|
27 |
+
"gpt-4-0613": 8192,
|
28 |
+
"gpt-4-32k-0613": 32768,
|
29 |
+
"gpt-4-1106-preview": 128000,
|
30 |
+
"gpt-4-vision-preview": 128000
|
31 |
+
}
|
32 |
+
}
|
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/1.jpg
ADDED
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/2.jpg
ADDED
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/3.jpg
ADDED
OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/4.jpg
ADDED