alKoGolik commited on
Commit
6fcd376
β€’
1 Parent(s): 8f42101

Upload 210 files

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. .gitignore +48 -0
  3. Llama2-Code-Interpreter/.gitignore +48 -0
  4. Llama2-Code-Interpreter/README.md +131 -0
  5. Llama2-Code-Interpreter/assets/TSLA_90days.png +0 -0
  6. Llama2-Code-Interpreter/assets/logo.png +0 -0
  7. Llama2-Code-Interpreter/assets/logo2.png +0 -0
  8. Llama2-Code-Interpreter/assets/president_code.gif +0 -0
  9. Llama2-Code-Interpreter/assets/president_code.png +0 -0
  10. Llama2-Code-Interpreter/assets/result_nvidia_chart.gif +3 -0
  11. Llama2-Code-Interpreter/assets/tok_hist.png +0 -0
  12. Llama2-Code-Interpreter/chatbot.py +238 -0
  13. Llama2-Code-Interpreter/code_interpreter/BaseCodeInterpreter.py +59 -0
  14. Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreter.py +234 -0
  15. Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreterDataCollect.py +271 -0
  16. Llama2-Code-Interpreter/code_interpreter/JuypyterClient.py +75 -0
  17. Llama2-Code-Interpreter/code_interpreter/LlamaCodeInterpreter.py +286 -0
  18. Llama2-Code-Interpreter/code_interpreter/RetrospectiveGPTCodeInterpreter.py +472 -0
  19. Llama2-Code-Interpreter/code_interpreter/__pycache__/JuypyterClient.cpython-311.pyc +0 -0
  20. Llama2-Code-Interpreter/code_interpreter/__pycache__/LlamaCodeInterpreter.cpython-311.pyc +0 -0
  21. Llama2-Code-Interpreter/code_interpreter/llama_hf.py +101 -0
  22. Llama2-Code-Interpreter/eval/eval.md +0 -0
  23. Llama2-Code-Interpreter/eval/gsm8k.py +115 -0
  24. Llama2-Code-Interpreter/eval/human_eval.py +289 -0
  25. Llama2-Code-Interpreter/eval/inference.py +204 -0
  26. Llama2-Code-Interpreter/finetuning/__pycache__/conversation_template.cpython-311.pyc +0 -0
  27. Llama2-Code-Interpreter/finetuning/codellama_wrapper.py +21 -0
  28. Llama2-Code-Interpreter/finetuning/conversation_template.py +80 -0
  29. Llama2-Code-Interpreter/finetuning/train.py +336 -0
  30. Llama2-Code-Interpreter/prompt/__init__.py +0 -0
  31. Llama2-Code-Interpreter/prompt/gpt4_prompt.py +277 -0
  32. Llama2-Code-Interpreter/requirements.txt +32 -0
  33. Llama2-Code-Interpreter/utils/__pycache__/special_tok_llama2.cpython-311.pyc +0 -0
  34. Llama2-Code-Interpreter/utils/check_nb_out.py +20 -0
  35. Llama2-Code-Interpreter/utils/check_nb_plot_img_out.py +81 -0
  36. Llama2-Code-Interpreter/utils/cleaner.py +28 -0
  37. Llama2-Code-Interpreter/utils/const.py +314 -0
  38. Llama2-Code-Interpreter/utils/convert_llama_weights_to_hf.py +375 -0
  39. Llama2-Code-Interpreter/utils/special_tok_llama2.py +14 -0
  40. OpenCodeInterpreter/LICENSE +201 -0
  41. OpenCodeInterpreter/README.md +83 -0
  42. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/LICENSE +201 -0
  43. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README.md +143 -0
  44. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/README_CN.md +140 -0
  45. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.azure.example.json +24 -0
  46. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/config_example/config.example.json +32 -0
  47. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/1.jpg +0 -0
  48. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/2.jpg +0 -0
  49. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/3.jpg +0 -0
  50. OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/4.jpg +0 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/result_nvidia_chart.gif filter=lfs diff=lfs merge=lfs -text
37
+ Llama2-Code-Interpreter/assets/result_nvidia_chart.gif filter=lfs diff=lfs merge=lfs -text
38
+ OpenCodeInterpreter/data_collection/Local-Code-Interpreter/example_img/save_to_notebook_demo.gif filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore .ckpt files
2
+ ckpt
3
+
4
+ # Ignore Python compiled files
5
+ __pycache__/
6
+ *.py[cod]
7
+
8
+ # Ignore Python virtual environment
9
+ venv/
10
+
11
+ # Ignore Jupyter notebook checkpoints
12
+ .ipynb_checkpoints/
13
+ .git/
14
+ .vscode/
15
+
16
+ # Ignore .DS_Store on MacOS
17
+ .DS_Store
18
+
19
+ rilab_key.txt
20
+ gpt4_custom_code_interpreter/rilab_key.txt
21
+ openai_api_key.txt
22
+
23
+ gpt4_custom_code_interpreter/
24
+ tmp/
25
+ output/
26
+ wandb/
27
+
28
+ utils/const.py
29
+ utils/hf_model_upload.py
30
+ gpt_data_gen/
31
+ *.json
32
+ *.txt
33
+ *.sh
34
+ *.pt
35
+ *.pth
36
+ *.ckpt
37
+ *.tokenizer
38
+
39
+ # eval data
40
+ eval/ds1000_data
41
+ eval/grade-school-math
42
+
43
+ # gradio features
44
+ chatbot_feat.py
45
+ chatbot_feat2.py
46
+ gradio_test.py
47
+
48
+
Llama2-Code-Interpreter/.gitignore ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore .ckpt files
2
+ ckpt
3
+
4
+ # Ignore Python compiled files
5
+ __pycache__/
6
+ *.py[cod]
7
+
8
+ # Ignore Python virtual environment
9
+ venv/
10
+
11
+ # Ignore Jupyter notebook checkpoints
12
+ .ipynb_checkpoints/
13
+ .git/
14
+ .vscode/
15
+
16
+ # Ignore .DS_Store on MacOS
17
+ .DS_Store
18
+
19
+ rilab_key.txt
20
+ gpt4_custom_code_interpreter/rilab_key.txt
21
+ openai_api_key.txt
22
+
23
+ gpt4_custom_code_interpreter/
24
+ tmp/
25
+ output/
26
+ wandb/
27
+
28
+ utils/const.py
29
+ utils/hf_model_upload.py
30
+ gpt_data_gen/
31
+ *.json
32
+ *.txt
33
+ *.sh
34
+ *.pt
35
+ *.pth
36
+ *.ckpt
37
+ *.tokenizer
38
+
39
+ # eval data
40
+ eval/ds1000_data
41
+ eval/grade-school-math
42
+
43
+ # gradio features
44
+ chatbot_feat.py
45
+ chatbot_feat2.py
46
+ gradio_test.py
47
+
48
+
Llama2-Code-Interpreter/README.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center" width="100%">
2
+ <img src="/assets/logo2.png" alt="llama2 code interprerter icon" style="width: 200px; height:200px; display: block; margin: auto; border-radius: 50%;">
3
+ </p>
4
+
5
+
6
+ # Llama2 Code Interpreter
7
+
8
+ <p align="center">
9
+ πŸ€— <a href="https://huggingface.co/Seungyoun/codellama-7b-instruct-pad" target="_blank">CodeLlama 7B Finetuned Model (HF)</a>
10
+ </p>
11
+
12
+
13
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/release/python-390/)
14
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
15
+
16
+ This project allows LLM to generate code, execute it, receive feedback, debug, and answer questions based on the whole process. It is designed to be intuitive and versatile, capable of dealing with multiple languages and frameworks.
17
+
18
+ [The purpose and direction of the project](https://github.com/SeungyounShin/Llama2-Code-Interpreter/wiki)
19
+
20
+ ## Quick Start
21
+
22
+ **Run the Gradio App**:
23
+ ```bash
24
+ python3 chatbot.py --path Seungyoun/codellama-7b-instruct-pad
25
+ ```
26
+
27
+ ## News
28
+
29
+ - πŸ”₯πŸ”₯πŸ”₯[2023/08/27] We're thrilled to announce that our **[πŸ€— Llama2 Code Interpreter-7B](https://huggingface.co/Seungyoun/codellama-7b-instruct-pad) (Finetuned from [CodeLlama-7B-Instruct](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf))** model achieved a remarkable **70.12pass@1** on the [HumanEval Benchmarks](https://github.com/openai/human-eval).
30
+
31
+
32
+ **HumanEval**
33
+
34
+ | Model | Score(pass@1) |
35
+ |-------------------------------|--------|
36
+ | Codellama instruct 7b | 34.8% |
37
+ | Codellama instruct 7b - finetuning | 70.12% |
38
+
39
+ **GSM8K**
40
+
41
+ | Model | Score |
42
+ |-------------------------------|--------|
43
+ | Code Llama 7B | 13% |
44
+ | Code Llama 13B | 20.8% |
45
+ | Codellama instruct 7b - finetuning | 28% |
46
+
47
+
48
+ ## 🌟 Key Features
49
+
50
+ - [x] πŸš€ **Code Generation and Execution**: Llama2 is capable of generating code, which it then automatically identifies and executes within its generated code blocks.
51
+ - [x] Monitors and retains Python variables that were used in previously executed code blocks.
52
+ - [x] 🌟 At the moment, my focus is on "Data development for GPT-4 code interpretation" and "Enhancing the model using this data". For more details, check out the [feat/finetuning branch](https://github.com/SeungyounShin/Llama2-Code-Interpreter/tree/feat/finetuning) in our repository.
53
+ - [x] 🌟 CodeLlama Support [CodeLlama2](https://github.com/facebookresearch/codellama)
54
+
55
+ ## Examples
56
+
57
+ ---
58
+ <div align="center">
59
+
60
+ ***Llama2 in Action***
61
+
62
+ <p align="center" width="100%">
63
+ <img src="assets/result_nvidia_chart.gif" alt="example1_president_search_with_code" style="width: 600px; display: block; margin: auto; border-radius: 50%;">
64
+ </p>
65
+
66
+ </div>
67
+
68
+ In the GIF, Llama2 is seen in action. A user types in the request: `Plot Nvidia 90 days chart.` Llama2, an advanced code interpreter fine-tuned on a select dataset, swiftly queries `Yahoo Finance`. Moments later, it fetches the latest Nvidia stock prices from the past 90 days. Using `Matplotlib`, Llama2 then generates a clear and detailed stock price chart for Nvidia, showcasing its performance over the given period.
69
+
70
+
71
+
72
+ ## Installation
73
+
74
+ 1. **Clone the Repository (if you haven't already)**:
75
+ ```bash
76
+ git clone https://github.com/SeungyounShin/Llama2-Code-Interpreter.git
77
+ cd Llama2-Code-Interpreter
78
+ ```
79
+
80
+ 2. **Install the required dependencies:**
81
+ ```bash
82
+ pip install -r requirements.txt
83
+ ```
84
+
85
+ ---
86
+
87
+ ### Run App with GPT4 finetunned Llama Model
88
+
89
+ To start interacting with Llama2 via the Gradio UI using `codellama-7b-instruct-pad`, follow the steps below:
90
+
91
+
92
+ 2. **Run the Gradio App**:
93
+ ```bash
94
+ python3 chatbot.py --path Seungyoun/codellama-7b-instruct-pad
95
+ ```
96
+
97
+ For those who want to use other models:
98
+
99
+ ### General Instructions to Run App
100
+
101
+ To start interacting with Llama2 via the Gradio UI using other models:
102
+
103
+ 1. **Run the Command**:
104
+ ```bash
105
+ python3 chatbot.py --model_path <your-model-path>
106
+ ```
107
+
108
+ Replace `<your-model-path>` with the path to the model file you wish to use. A recommended model for chat interactions is `meta-llama/Llama-2-13b-chat`.
109
+
110
+ ## Contributions
111
+
112
+ Contributions, issues, and feature requests are welcome! Feel free to check [issues page](https://github.com/SeungyounShin/Llama2-Code-Interpreter/issues).
113
+
114
+ ## License
115
+
116
+ Distributed under the MIT License. See `LICENSE` for more information.
117
+
118
+ ## Contact
119
+
120
+ Seungyoun, Shin - 2022021568@korea.ac.kr
121
+
122
+ ## Acknowledgement
123
+
124
+ Here are some relevant and related projects that have contributed to the development of this work:
125
+
126
+ 1. **llama2** : [GitHub Repository](https://github.com/facebookresearch/llama)
127
+ 2. **yet-another-gpt-tutorial** : [GitHub Repository](https://github.com/sjchoi86/yet-another-gpt-tutorial/tree/main)
128
+
129
+ These projects have been instrumental in providing valuable insights and resources, and their contributions are highly appreciated.
130
+
131
+ ---
Llama2-Code-Interpreter/assets/TSLA_90days.png ADDED
Llama2-Code-Interpreter/assets/logo.png ADDED
Llama2-Code-Interpreter/assets/logo2.png ADDED
Llama2-Code-Interpreter/assets/president_code.gif ADDED
Llama2-Code-Interpreter/assets/president_code.png ADDED
Llama2-Code-Interpreter/assets/result_nvidia_chart.gif ADDED

Git LFS Details

  • SHA256: e0a0b39e470967a50cf90e2ebd448c297e582f3b673f290cc990b886f9fb8002
  • Pointer size: 132 Bytes
  • Size of remote file: 1.12 MB
Llama2-Code-Interpreter/assets/tok_hist.png ADDED
Llama2-Code-Interpreter/chatbot.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import time, os
4
+ import copy
5
+ import re
6
+
7
+ import torch
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+ from datetime import datetime
11
+
12
+ from threading import Thread
13
+ from typing import Optional
14
+ from transformers import TextIteratorStreamer
15
+
16
+ from utils.special_tok_llama2 import (
17
+ B_CODE,
18
+ E_CODE,
19
+ B_RESULT,
20
+ E_RESULT,
21
+ B_INST,
22
+ E_INST,
23
+ B_SYS,
24
+ E_SYS,
25
+ DEFAULT_PAD_TOKEN,
26
+ DEFAULT_BOS_TOKEN,
27
+ DEFAULT_EOS_TOKEN,
28
+ DEFAULT_UNK_TOKEN,
29
+ IGNORE_INDEX,
30
+ )
31
+
32
+ from finetuning.conversation_template import (
33
+ json_to_code_result_tok_temp,
34
+ msg_to_code_result_tok_temp,
35
+ )
36
+
37
+ import warnings
38
+
39
+ warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
40
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
41
+
42
+
43
+ from code_interpreter.LlamaCodeInterpreter import LlamaCodeInterpreter
44
+
45
+
46
+ class StreamingLlamaCodeInterpreter(LlamaCodeInterpreter):
47
+ streamer: Optional[TextIteratorStreamer] = None
48
+
49
+ # overwirte generate function
50
+ @torch.inference_mode()
51
+ def generate(
52
+ self,
53
+ prompt: str = "[INST]\n###User : hi\n###Assistant :",
54
+ max_new_tokens=512,
55
+ do_sample: bool = True,
56
+ use_cache: bool = True,
57
+ top_p: float = 0.95,
58
+ temperature: float = 0.1,
59
+ top_k: int = 50,
60
+ repetition_penalty: float = 1.0,
61
+ ) -> str:
62
+ # Get the model and tokenizer, and tokenize the user text.
63
+
64
+ self.streamer = TextIteratorStreamer(
65
+ self.tokenizer, skip_prompt=True, Timeout=5
66
+ )
67
+
68
+ input_prompt = copy.deepcopy(prompt)
69
+ inputs = self.tokenizer([prompt], return_tensors="pt")
70
+ input_tokens_shape = inputs["input_ids"].shape[-1]
71
+
72
+ eos_token_id = self.tokenizer.convert_tokens_to_ids(DEFAULT_EOS_TOKEN)
73
+ e_code_token_id = self.tokenizer.convert_tokens_to_ids(E_CODE)
74
+
75
+ kwargs = dict(
76
+ **inputs,
77
+ max_new_tokens=max_new_tokens,
78
+ do_sample=do_sample,
79
+ top_p=top_p,
80
+ temperature=temperature,
81
+ use_cache=use_cache,
82
+ top_k=top_k,
83
+ repetition_penalty=repetition_penalty,
84
+ eos_token_id=[
85
+ eos_token_id,
86
+ e_code_token_id,
87
+ ], # Stop generation at either EOS or E_CODE token
88
+ streamer=self.streamer,
89
+ )
90
+
91
+ thread = Thread(target=self.model.generate, kwargs=kwargs)
92
+ thread.start()
93
+
94
+ return ""
95
+
96
+
97
+ def change_markdown_image(text: str):
98
+ modified_text = re.sub(r"!\[(.*?)\]\(\'(.*?)\'\)", r"![\1](/file=\2)", text)
99
+ return modified_text
100
+
101
+
102
+ def gradio_launch(model_path: str, load_in_4bit: bool = True, MAX_TRY: int = 5):
103
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
104
+ chatbot = gr.Chatbot(height=820, avatar_images="./assets/logo2.png")
105
+ msg = gr.Textbox()
106
+ clear = gr.Button("Clear")
107
+
108
+ interpreter = StreamingLlamaCodeInterpreter(
109
+ model_path=model_path, load_in_4bit=load_in_4bit
110
+ )
111
+
112
+ def bot(history):
113
+ user_message = history[-1][0]
114
+
115
+ interpreter.dialog.append({"role": "user", "content": user_message})
116
+
117
+ print(f"###User : [bold]{user_message}[bold]")
118
+ # print(f"###Assistant : ")
119
+
120
+ # setup
121
+ HAS_CODE = False # For now
122
+ INST_END_TOK_FLAG = False
123
+ full_generated_text = ""
124
+ prompt = interpreter.dialog_to_prompt(dialog=interpreter.dialog)
125
+ start_prompt = copy.deepcopy(prompt)
126
+ prompt = f"{prompt} {E_INST}"
127
+
128
+ _ = interpreter.generate(prompt)
129
+ history[-1][1] = ""
130
+ generated_text = ""
131
+ for character in interpreter.streamer:
132
+ history[-1][1] += character
133
+ generated_text += character
134
+ yield history
135
+
136
+ full_generated_text += generated_text
137
+ HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
138
+ generated_text
139
+ )
140
+
141
+ attempt = 1
142
+ while HAS_CODE:
143
+ if attempt > MAX_TRY:
144
+ break
145
+ # if no code then doesn't have to execute it
146
+
147
+ # refine code block for history
148
+ history[-1][1] = (
149
+ history[-1][1]
150
+ .replace(f"{B_CODE}", "\n```python\n")
151
+ .replace(f"{E_CODE}", "\n```\n")
152
+ )
153
+ history[-1][1] = change_markdown_image(history[-1][1])
154
+ yield history
155
+
156
+ # replace unknown thing to none ''
157
+ generated_code_block = generated_code_block.replace(
158
+ "<unk>_", ""
159
+ ).replace("<unk>", "")
160
+
161
+ (
162
+ code_block_output,
163
+ error_flag,
164
+ ) = interpreter.execute_code_and_return_output(
165
+ f"{generated_code_block}"
166
+ )
167
+ code_block_output = interpreter.clean_code_output(code_block_output)
168
+ generated_text = (
169
+ f"{generated_text}\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
170
+ )
171
+ full_generated_text += (
172
+ f"\n{B_RESULT}\n{code_block_output}\n{E_RESULT}\n"
173
+ )
174
+
175
+ # append code output
176
+ history[-1][1] += f"\n```RESULT\n{code_block_output}\n```\n"
177
+ history[-1][1] = change_markdown_image(history[-1][1])
178
+ yield history
179
+
180
+ prompt = f"{prompt} {generated_text}"
181
+
182
+ _ = interpreter.generate(prompt)
183
+ for character in interpreter.streamer:
184
+ history[-1][1] += character
185
+ generated_text += character
186
+ history[-1][1] = change_markdown_image(history[-1][1])
187
+ yield history
188
+
189
+ HAS_CODE, generated_code_block = interpreter.extract_code_blocks(
190
+ generated_text
191
+ )
192
+
193
+ if generated_text.endswith("</s>"):
194
+ break
195
+
196
+ attempt += 1
197
+
198
+ interpreter.dialog.append(
199
+ {
200
+ "role": "assistant",
201
+ "content": generated_text.replace("<unk>_", "")
202
+ .replace("<unk>", "")
203
+ .replace("</s>", ""),
204
+ }
205
+ )
206
+
207
+ print("----------\n" * 2)
208
+ print(interpreter.dialog)
209
+ print("----------\n" * 2)
210
+
211
+ return history[-1][1]
212
+
213
+ def user(user_message, history):
214
+ return "", history + [[user_message, None]]
215
+
216
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
217
+ bot, chatbot, chatbot
218
+ )
219
+ clear.click(lambda: None, None, chatbot, queue=False)
220
+
221
+ demo.queue()
222
+ demo.launch()
223
+
224
+
225
+ if __name__ == "__main__":
226
+ import argparse
227
+
228
+ parser = argparse.ArgumentParser(description="Process path for LLAMA2_FINETUNEED.")
229
+ parser.add_argument(
230
+ "--path",
231
+ type=str,
232
+ required=True,
233
+ help="Path to the finetuned LLAMA2 model.",
234
+ default="./output/llama-2-7b-codellama-ci",
235
+ )
236
+ args = parser.parse_args()
237
+
238
+ gradio_launch(model_path=args.path, load_in_4bit=True)
Llama2-Code-Interpreter/code_interpreter/BaseCodeInterpreter.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ from pathlib import Path
7
+ from typing import List, Literal, Optional, Tuple, TypedDict, Dict
8
+
9
+ prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+ sys.path.append(prj_root_path)
11
+
12
+ import torch
13
+ import transformers
14
+ from transformers import LlamaForCausalLM, LlamaTokenizer
15
+
16
+ import nbformat
17
+
18
+ # from nbconvert.preprocessors import ExecutePreprocessor
19
+ # from nbconvert.preprocessors.execute import CellExecutionError
20
+
21
+ from utils.const import *
22
+ from utils.cleaner import clean_error_msg
23
+ from colorama import init, Fore, Style
24
+ from rich.markdown import Markdown
25
+ import base64
26
+
27
+ import openai
28
+ from retrying import retry
29
+ import logging
30
+ from termcolor import colored
31
+ from code_interpreter.JuypyterClient import JupyterNotebook
32
+
33
+
34
+ class BaseCodeInterpreter:
35
+ def __init__(self):
36
+ self.dialog = [
37
+ {
38
+ "role": "system",
39
+ "content": CODE_INTERPRETER_SYSTEM_PROMPT,
40
+ },
41
+ # {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
42
+ # {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
43
+ ]
44
+
45
+ self.nb = JupyterNotebook()
46
+
47
+ @staticmethod
48
+ def extract_code_blocks(text: str):
49
+ pattern = r"```(?:python\n)?(.*?)```" # Match optional 'python\n' but don't capture it
50
+ code_blocks = re.findall(pattern, text, re.DOTALL)
51
+ return [block.strip() for block in code_blocks]
52
+
53
+ @staticmethod
54
+ def parse_last_answer(text: str) -> str:
55
+ return text.split(E_INST)[-1]
56
+
57
+ def execute_code_and_return_output(self, code_str: str):
58
+ outputs, error_flag = self.nb.add_and_run(code_str)
59
+ return outputs, error_flag
Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreter.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import sys
4
+ import time
5
+ import re
6
+ from pathlib import Path
7
+ from typing import List, Literal, Optional, Tuple, TypedDict, Dict
8
+
9
+ # Get the path from environment variable
10
+ prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
11
+ sys.path.append(prj_root_path)
12
+ from code_interpreter.JuypyterClient import JupyterNotebook
13
+ from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
14
+ from utils.const import *
15
+ from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
16
+
17
+ # from prompt.gpt4_prompt import CODE_INTERPRETER_SYSTEM_PROMPT
18
+ from colorama import init, Fore, Style
19
+ from rich.markdown import Markdown
20
+ import base64
21
+
22
+ import openai
23
+ from retrying import retry
24
+ import logging
25
+ from termcolor import colored
26
+
27
+ # load from key file
28
+ with open("./openai_api_key.txt") as f:
29
+ OPENAI_API_KEY = key = f.read()
30
+ openai.api_key = OPENAI_API_KEY
31
+ from utils.cleaner import clean_error_msg
32
+
33
+
34
+ def remove_string(s):
35
+ pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}:.*LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n"
36
+ return re.sub(pattern, "", s)
37
+
38
+
39
+ def clean_the_dialog(dialog, question):
40
+ question_idx = 0
41
+ for idx, item in enumerate(dialog):
42
+ if item["content"] == question:
43
+ question_idx = idx
44
+
45
+ filtered_dialog = dialog[question_idx:]
46
+
47
+ user_qinit_dict = filtered_dialog[0]
48
+ answer_fuse_str = "\n".join([i["content"].strip() for i in filtered_dialog[1::2]])
49
+
50
+ final_dialog_dict = [
51
+ {"role": "user", "content": user_qinit_dict["content"]},
52
+ {"role": "assistant", "content": answer_fuse_str},
53
+ ]
54
+
55
+ return final_dialog_dict
56
+
57
+
58
+ class GPTCodeInterpreter(BaseCodeInterpreter):
59
+ def __init__(self, model="gpt-4"):
60
+ self.model = model
61
+ self.dialog = [
62
+ # {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT },
63
+ {
64
+ "role": "system",
65
+ "content": CODE_INTERPRETER_SYSTEM_PROMPT,
66
+ },
67
+ # {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
68
+ # {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
69
+ ]
70
+
71
+ # self.dialog += few_shot_4
72
+ self.response = None
73
+
74
+ assert os.path.isfile(
75
+ "./openai_api_key.txt"
76
+ ), "The openai_api_key.txt file could not be found. Please make sure it is in the same directory as this script, and that it contains your OpenAI API key."
77
+
78
+ # load from key file
79
+ with open("./openai_api_key.txt") as f:
80
+ OPENAI_API_KEY = f.read()
81
+ openai.api_key = OPENAI_API_KEY
82
+
83
+ self.nb = JupyterNotebook()
84
+ out = self.nb.add_and_run(TOOLS_CODE) # tool import
85
+
86
+ def get_response_content(self):
87
+ if self.response:
88
+ return self.response["choices"][0]["message"]["content"]
89
+ else:
90
+ return None
91
+
92
+ @retry(
93
+ stop_max_attempt_number=7,
94
+ wait_exponential_multiplier=1000,
95
+ wait_exponential_max=10000,
96
+ )
97
+ def ChatCompletion(self):
98
+ try:
99
+ self.response = openai.ChatCompletion.create(
100
+ model=self.model, messages=self.dialog, temperature=0.2, top_p=0.9
101
+ )
102
+ except Exception as e:
103
+ print(f"error while OPENAI api call {e}")
104
+
105
+ def close(self):
106
+ """
107
+ close jupyter notebook, and this class instance
108
+ """
109
+ self.nb.close()
110
+
111
+ def save_dialog(self, path: str = "./output/dialog.json"):
112
+ with open(path, "w") as f:
113
+ json.dump(self.dialog, f)
114
+ print(f" ++Dialog saved to [{path}]")
115
+
116
+ def chat(
117
+ self,
118
+ user_message: str,
119
+ VERBOSE: bool = False,
120
+ MAX_TRY: int = 6,
121
+ code_exec_prefix: str = "",
122
+ feedback_prompt: str = "",
123
+ append_result: bool = True,
124
+ ):
125
+ self.dialog.append({"role": "user", "content": user_message})
126
+
127
+ code_block_output = ""
128
+ attempt = 0
129
+ img_data = None
130
+
131
+ if VERBOSE:
132
+ print(
133
+ "###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
134
+ )
135
+ print("\n###Assistant : ")
136
+
137
+ for i in range(MAX_TRY):
138
+ # GPT response
139
+ self.ChatCompletion()
140
+
141
+ # Get code block
142
+ generated_text = self.get_response_content()
143
+ generated_code_blocks = self.extract_code_blocks(generated_text)
144
+ # execute code
145
+ if len(generated_code_blocks) > 0:
146
+ # Find the position of the first code block in the last answer
147
+ first_code_block_pos = (
148
+ generated_text.find(generated_code_blocks[0])
149
+ if generated_code_blocks
150
+ else -1
151
+ )
152
+ text_before_first_code_block = (
153
+ generated_text
154
+ if first_code_block_pos == -1
155
+ else generated_text[:first_code_block_pos]
156
+ )
157
+ if VERBOSE:
158
+ print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
159
+ if VERBOSE:
160
+ print(
161
+ Fore.YELLOW
162
+ + generated_code_blocks[0]
163
+ + "\n```\n"
164
+ + Style.RESET_ALL
165
+ )
166
+ code_block_output, error_flag = self.execute_code_and_return_output(
167
+ generated_code_blocks[0]
168
+ )
169
+
170
+ code_block_output = f"{code_block_output}"
171
+
172
+ if code_block_output is not None:
173
+ code_block_output = code_block_output.strip()
174
+
175
+ code_block_output = remove_string(code_block_output)
176
+ if len(code_block_output) > 500:
177
+ code_block_output = (
178
+ code_block_output[:200] + "β‹―(skip)β‹―" + code_block_output[-200:]
179
+ )
180
+ code_block_output_str = f"\n```RESULT\n{code_block_output}\n```\n"
181
+ if append_result:
182
+ gen_final = f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}"
183
+ if VERBOSE:
184
+ print(
185
+ Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL
186
+ )
187
+ else:
188
+ gen_final = (
189
+ f"{text_before_first_code_block}{generated_code_blocks[0]}\n```"
190
+ )
191
+
192
+ self.dialog.append(
193
+ {
194
+ "role": "assistant",
195
+ "content": gen_final,
196
+ }
197
+ )
198
+
199
+ if len(feedback_prompt) < 5:
200
+ feedback_dict = {
201
+ "role": "user",
202
+ "content": "Keep going. if you think debugging tell me where you got wrong and better code.\nNeed conclusion to question only text (Do not leave result part alone).\nif doesn't need to generated anything then just say <done>",
203
+ }
204
+ else:
205
+ feedback_dict = {
206
+ "role": "user",
207
+ "content": f"{feedback_prompt}",
208
+ }
209
+
210
+ self.dialog.append(feedback_dict)
211
+
212
+ else:
213
+ if "<done>" in generated_text:
214
+ generated_text = generated_text.split("<done>")[0].strip()
215
+
216
+ if len(generated_text) <= 0:
217
+ break
218
+
219
+ if VERBOSE:
220
+ print(Fore.GREEN + generated_text + Style.RESET_ALL)
221
+
222
+ self.dialog.append(
223
+ {
224
+ "role": "assistant",
225
+ "content": f"{generated_text}",
226
+ }
227
+ )
228
+ break
229
+
230
+ self.dialog = [self.dialog[0]] + clean_the_dialog(
231
+ self.dialog, question=user_message
232
+ ) # delete retrospections after generation step
233
+
234
+ return self.dialog[-1]
Llama2-Code-Interpreter/code_interpreter/GPTCodeInterpreterDataCollect.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os, sys
3
+ import time
4
+ import re
5
+ from pathlib import Path
6
+ from typing import List, Literal, Optional, Tuple, TypedDict, Dict
7
+
8
+ # Get the path from environment variable
9
+ prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+ sys.path.append(prj_root_path)
11
+ from code_interpreter.JuypyterClient import JupyterNotebook
12
+ from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
13
+ from utils.const import *
14
+ from colorama import init, Fore, Style
15
+ from rich.markdown import Markdown
16
+ import base64
17
+
18
+ import openai
19
+ from retrying import retry
20
+ import logging
21
+ from termcolor import colored
22
+
23
+ # load from key file
24
+ with open("./openai_api_key.txt") as f:
25
+ OPENAI_API_KEY = key = f.read()
26
+ openai.api_key = OPENAI_API_KEY
27
+ from utils.cleaner import clean_error_msg
28
+ from prompt.gpt4_prompt import *
29
+
30
+
31
+ def remove_string(s):
32
+ pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{6}:.*LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64\n"
33
+ return re.sub(pattern, "", s)
34
+
35
+
36
+ def gen_questions(prefix="What is 55th fibonacci number?"):
37
+ response = openai.ChatCompletion.create(
38
+ model="gpt-4",
39
+ messages=[
40
+ {
41
+ "role": "system",
42
+ "content": "You are teacherGPT, You need to generate only questions(to student not the explanation and solution) based on student history. \n\nGive him only one question.\n\nAlso remember that student can use code. ",
43
+ },
44
+ {
45
+ "role": "user",
46
+ "content": f"{prefix}\nmore harder one but not the similar domain of above.",
47
+ },
48
+ ],
49
+ temperature=0.1,
50
+ max_tokens=300,
51
+ top_p=1,
52
+ frequency_penalty=0,
53
+ presence_penalty=0,
54
+ )
55
+ return response["choices"][0]["message"]["content"]
56
+
57
+
58
+ def save_dialog(dialog, base_path: str = f"{prj_root_path}/gpt_data_gen"):
59
+ file_number = 0
60
+ while True:
61
+ # Construct the path
62
+ file_name = f"{file_number}.json"
63
+ full_path = os.path.join(base_path, file_name)
64
+
65
+ # Check if the file already exists
66
+ if not os.path.exists(full_path):
67
+ # If not, save the file
68
+ with open(full_path, "w") as f:
69
+ json.dump(dialog, f)
70
+ print(f"Dialog saved to {full_path}")
71
+ break
72
+ else:
73
+ # If the file does exist, increment the file number and try again
74
+ file_number += 1
75
+
76
+
77
+ def clean_the_dialog(dialog, question):
78
+ question_idx = 0
79
+ for idx, item in enumerate(dialog):
80
+ if item["content"] == question:
81
+ question_idx = idx
82
+
83
+ filtered_dialog = dialog[question_idx:]
84
+
85
+ user_qinit_dict = filtered_dialog[0]
86
+ answer_fuse_str = "\n".join([i["content"].strip() for i in filtered_dialog[1::2]])
87
+
88
+ final_dialog_dict = [
89
+ {"role": "user", "content": user_qinit_dict["content"]},
90
+ {"role": "assistant", "content": answer_fuse_str},
91
+ ]
92
+
93
+ return final_dialog_dict
94
+
95
+
96
+ class GPTCodeInterpreter(BaseCodeInterpreter):
97
+ def __init__(self, model="gpt-4"):
98
+ self.model = model
99
+ self.dialog = [
100
+ # {"role": "system", "content": CODE_INTERPRETER_SYSTEM_PROMPT },
101
+ {
102
+ "role": "system",
103
+ "content": CODE_INTERPRETER_SYSTEM_PROMPT + "\n" + extra_prompt,
104
+ },
105
+ # {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
106
+ # {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
107
+ ]
108
+
109
+ self.dialog += few_shot_1
110
+ # self.dialog += few_shot_4
111
+ self.response = None
112
+
113
+ assert os.path.isfile(
114
+ "./openai_api_key.txt"
115
+ ), "The openai_api_key.txt file could not be found. Please make sure it is in the same directory as this script, and that it contains your OpenAI API key."
116
+
117
+ # load from key file
118
+ with open("./openai_api_key.txt") as f:
119
+ OPENAI_API_KEY = f.read()
120
+ openai.api_key = OPENAI_API_KEY
121
+
122
+ self.nb = JupyterNotebook()
123
+ out = self.nb.add_and_run(TOOLS_CODE) # tool import
124
+
125
+ def get_response_content(self):
126
+ if self.response:
127
+ return self.response["choices"][0]["message"]["content"]
128
+ else:
129
+ return None
130
+
131
+ @retry(
132
+ stop_max_attempt_number=7,
133
+ wait_exponential_multiplier=1000,
134
+ wait_exponential_max=10000,
135
+ )
136
+ def ChatCompletion(self):
137
+ try:
138
+ self.response = openai.ChatCompletion.create(
139
+ model=self.model, messages=self.dialog, temperature=0.1, top_p=1.0
140
+ )
141
+ except Exception as e:
142
+ print(f"error while OPENAI api call {e}")
143
+
144
+ def chat(self, user_message: str, VERBOSE: bool = False, MAX_RETRY: int = 6):
145
+ self.dialog.append({"role": "user", "content": user_message})
146
+
147
+ code_block_output = ""
148
+ attempt = 0
149
+ img_data = None
150
+
151
+ if VERBOSE:
152
+ print(
153
+ "###User : " + Fore.BLUE + Style.BRIGHT + user_message + Style.RESET_ALL
154
+ )
155
+ print("\n###Assistant : ")
156
+
157
+ for i in range(MAX_RETRY):
158
+ # GPT response
159
+ self.ChatCompletion()
160
+
161
+ # Get code block
162
+ generated_text = self.get_response_content()
163
+ generated_code_blocks = self.extract_code_blocks(generated_text)
164
+ # execute code
165
+ if len(generated_code_blocks) > 0:
166
+ # Find the position of the first code block in the last answer
167
+ first_code_block_pos = (
168
+ generated_text.find(generated_code_blocks[0])
169
+ if generated_code_blocks
170
+ else -1
171
+ )
172
+ text_before_first_code_block = (
173
+ generated_text
174
+ if first_code_block_pos == -1
175
+ else generated_text[:first_code_block_pos]
176
+ )
177
+ if VERBOSE:
178
+ print(Fore.GREEN + text_before_first_code_block + Style.RESET_ALL)
179
+ if VERBOSE:
180
+ print(
181
+ Fore.YELLOW
182
+ + generated_code_blocks[0]
183
+ + "\n```\n"
184
+ + Style.RESET_ALL
185
+ )
186
+ code_block_output, error_flag = self.execute_code_and_return_output(
187
+ generated_code_blocks[0]
188
+ )
189
+
190
+ code_block_output = f"{code_block_output}"
191
+
192
+ if code_block_output is not None:
193
+ code_block_output = code_block_output.strip()
194
+
195
+ code_block_output = remove_string(code_block_output)
196
+ if len(code_block_output) > 500:
197
+ code_block_output = (
198
+ code_block_output[:200] + "β‹―(skip)β‹―" + code_block_output[-200:]
199
+ )
200
+ code_block_output_str = f"\n```RESULT\n{code_block_output}\n```\n"
201
+ if VERBOSE:
202
+ print(Fore.LIGHTBLACK_EX + code_block_output_str + Style.RESET_ALL)
203
+ # markdown = Markdown(code_block_output_str)print(markdown)
204
+
205
+ gen_final = f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}"
206
+
207
+ self.dialog.append(
208
+ {
209
+ "role": "assistant",
210
+ "content": f"{text_before_first_code_block}{generated_code_blocks[0]}\n```{code_block_output_str}",
211
+ }
212
+ )
213
+
214
+ self.dialog.append(
215
+ {
216
+ "role": "user",
217
+ "content": "Keep going. if you think debugging generate code. need conclusion to question only text (Do not leave result part alone). Doesn't need to generated anything then just say <done>",
218
+ }
219
+ )
220
+
221
+ else:
222
+ if "<done>" in generated_text:
223
+ generated_text = generated_text.split("<done>")[0].strip()
224
+
225
+ if len(generated_text) <= 0:
226
+ break
227
+
228
+ if VERBOSE:
229
+ print(Fore.GREEN + generated_text + Style.RESET_ALL)
230
+
231
+ self.dialog.append(
232
+ {
233
+ "role": "assistant",
234
+ "content": f"{generated_text}",
235
+ }
236
+ )
237
+ break
238
+
239
+ return self.dialog[-1]
240
+
241
+
242
+ if __name__ == "__main__":
243
+ import random
244
+
245
+ SEED_TASK = [
246
+ # "Resize this image to 512x512\nUser Uploaded File : './tmp/img.png'",
247
+ "Write a Python script that retrieves Google Trends data for a given keyword and stock price data for a specific company over the same timeframe, normalizes both datasets to the same scale, and then plots them on the same graph to analyze potential correlations.",
248
+ "Could you conduct a frequency analysis on Apple's stock price to determine any cyclic patterns that occur on a weekly, monthly, or quarterly basis?",
249
+ ]
250
+
251
+ questions = SEED_TASK
252
+
253
+ from tqdm import tqdm
254
+
255
+ for i in tqdm(range(150000)):
256
+ interpreter = GPTCodeInterpreter()
257
+
258
+ question = questions[i]
259
+ output = interpreter.chat(user_message=question, VERBOSE=True, MAX_RETRY=5)
260
+
261
+ sample = clean_the_dialog(interpreter.dialog, question)
262
+
263
+ save_dialog(sample)
264
+
265
+ # q1,q2,q3 = random.sample(questions, k=3)
266
+ # question = gen_questions(prefix = f'{q1}\n{q2}\n{q3}')
267
+ # questions.append(question)
268
+
269
+ del interpreter
270
+
271
+ print(f"new question :: {question}")
Llama2-Code-Interpreter/code_interpreter/JuypyterClient.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from jupyter_client import KernelManager
2
+ import threading
3
+ import re
4
+
5
+
6
+ class JupyterNotebook:
7
+ def __init__(self):
8
+ self.km = KernelManager()
9
+ self.km.start_kernel()
10
+ self.kc = self.km.client()
11
+
12
+ def clean_output(self, outputs):
13
+ outputs_only_str = list()
14
+ for i in outputs:
15
+ if type(i) == dict:
16
+ if "text/plain" in list(i.keys()):
17
+ outputs_only_str.append(i["text/plain"])
18
+ elif type(i) == str:
19
+ outputs_only_str.append(i)
20
+ elif type(i) == list:
21
+ error_msg = "\n".join(i)
22
+ error_msg = re.sub(r"\x1b\[.*?m", "", error_msg)
23
+ outputs_only_str.append(error_msg)
24
+
25
+ return "\n".join(outputs_only_str).strip()
26
+
27
+ def add_and_run(self, code_string):
28
+ # This inner function will be executed in a separate thread
29
+ def run_code_in_thread():
30
+ nonlocal outputs, error_flag
31
+
32
+ # Execute the code and get the execution count
33
+ msg_id = self.kc.execute(code_string)
34
+
35
+ while True:
36
+ try:
37
+ msg = self.kc.get_iopub_msg(timeout=20)
38
+
39
+ msg_type = msg["header"]["msg_type"]
40
+ content = msg["content"]
41
+
42
+ if msg_type == "execute_result":
43
+ outputs.append(content["data"])
44
+ elif msg_type == "stream":
45
+ outputs.append(content["text"])
46
+ elif msg_type == "error":
47
+ error_flag = True
48
+ outputs.append(content["traceback"])
49
+
50
+ # If the execution state of the kernel is idle, it means the cell finished executing
51
+ if msg_type == "status" and content["execution_state"] == "idle":
52
+ break
53
+ except:
54
+ break
55
+
56
+ outputs = []
57
+ error_flag = False
58
+
59
+ # Start the thread to run the code
60
+ thread = threading.Thread(target=run_code_in_thread)
61
+ thread.start()
62
+
63
+ # Wait for 10 seconds for the thread to finish
64
+ thread.join(timeout=10)
65
+
66
+ # If the thread is still alive after 10 seconds, it's a timeout
67
+ if thread.is_alive():
68
+ outputs = ["Timeout after 10 seconds"]
69
+ error_flag = True
70
+
71
+ return self.clean_output(outputs), error_flag
72
+
73
+ def close(self):
74
+ """Shutdown the kernel."""
75
+ self.km.shutdown_kernel()
Llama2-Code-Interpreter/code_interpreter/LlamaCodeInterpreter.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ prj_root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
5
+ sys.path.append(prj_root_path)
6
+
7
+ from code_interpreter.JuypyterClient import JupyterNotebook
8
+ from code_interpreter.BaseCodeInterpreter import BaseCodeInterpreter
9
+ from utils.const import *
10
+
11
+ from typing import List, Literal, Optional, Tuple, TypedDict, Dict
12
+ from colorama import init, Fore, Style
13
+ import copy
14
+ import re
15
+
16
+ import torch
17
+ import transformers
18
+ from transformers import LlamaForCausalLM, LlamaTokenizer
19
+ from peft import PeftModel
20
+
21
+
22
+ sys.path.append(os.path.dirname(__file__))
23
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
24
+ from finetuning.conversation_template import msg_to_code_result_tok_temp
25
+ from utils.special_tok_llama2 import (
26
+ B_CODE,
27
+ E_CODE,
28
+ B_RESULT,
29
+ E_RESULT,
30
+ B_INST,
31
+ E_INST,
32
+ B_SYS,
33
+ E_SYS,
34
+ DEFAULT_PAD_TOKEN,
35
+ DEFAULT_BOS_TOKEN,
36
+ DEFAULT_EOS_TOKEN,
37
+ DEFAULT_UNK_TOKEN,
38
+ IGNORE_INDEX,
39
+ )
40
+
41
+ import warnings
42
+
43
+ warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
44
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
45
+
46
+
47
+ class LlamaCodeInterpreter(BaseCodeInterpreter):
48
+ def __init__(
49
+ self,
50
+ model_path: str,
51
+ load_in_8bit: bool = False,
52
+ load_in_4bit: bool = False,
53
+ peft_model: Optional[str] = None,
54
+ ):
55
+ # build tokenizer
56
+ self.tokenizer = LlamaTokenizer.from_pretrained(
57
+ model_path,
58
+ padding_side="right",
59
+ use_fast=False,
60
+ )
61
+
62
+ # Handle special tokens
63
+ special_tokens_dict = dict()
64
+ if self.tokenizer.pad_token is None:
65
+ special_tokens_dict["pad_token"] = DEFAULT_PAD_TOKEN # 32000
66
+ if self.tokenizer.eos_token is None:
67
+ special_tokens_dict["eos_token"] = DEFAULT_EOS_TOKEN # 2
68
+ if self.tokenizer.bos_token is None:
69
+ special_tokens_dict["bos_token"] = DEFAULT_BOS_TOKEN # 1
70
+ if self.tokenizer.unk_token is None:
71
+ special_tokens_dict["unk_token"] = DEFAULT_UNK_TOKEN
72
+
73
+ self.tokenizer.add_special_tokens(special_tokens_dict)
74
+ self.tokenizer.add_tokens(
75
+ [B_CODE, E_CODE, B_RESULT, E_RESULT, B_INST, E_INST, B_SYS, E_SYS],
76
+ special_tokens=True,
77
+ )
78
+
79
+ self.model = LlamaForCausalLM.from_pretrained(
80
+ model_path,
81
+ device_map="auto",
82
+ load_in_4bit=load_in_4bit,
83
+ load_in_8bit=load_in_8bit,
84
+ torch_dtype=torch.float16,
85
+ )
86
+
87
+ self.model.resize_token_embeddings(len(self.tokenizer))
88
+
89
+ if peft_model is not None:
90
+ peft_model = PeftModel.from_pretrained(self.model, peft_model)
91
+
92
+ self.model = self.model.eval()
93
+
94
+ self.dialog = [
95
+ {
96
+ "role": "system",
97
+ "content": CODE_INTERPRETER_SYSTEM_PROMPT + "\nUse code to answer",
98
+ },
99
+ # {"role": "user", "content": "How can I use BeautifulSoup to scrape a website and extract all the URLs on a page?"},
100
+ # {"role": "assistant", "content": "I think I need to use beatifulsoup to find current korean president,"}
101
+ ]
102
+
103
+ self.nb = JupyterNotebook()
104
+ self.MAX_CODE_OUTPUT_LENGTH = 3000
105
+ out = self.nb.add_and_run(TOOLS_CODE) # tool import
106
+ print(out)
107
+
108
+ def dialog_to_prompt(self, dialog: List[Dict]) -> str:
109
+ full_str = msg_to_code_result_tok_temp(dialog)
110
+
111
+ return full_str
112
+
113
+ @torch.inference_mode()
114
+ def generate(
115
+ self,
116
+ prompt: str = "[INST]\n###User : hi\n###Assistant :",
117
+ max_new_tokens=512,
118
+ do_sample: bool = True,
119
+ use_cache: bool = True,
120
+ top_p: float = 0.95,
121
+ temperature: float = 0.1,
122
+ top_k: int = 50,
123
+ repetition_penalty: float = 1.0,
124
+ ) -> str:
125
+ # Get the model and tokenizer, and tokenize the user text.
126
+
127
+ input_prompt = copy.deepcopy(prompt)
128
+ inputs = self.tokenizer([prompt], return_tensors="pt")
129
+ input_tokens_shape = inputs["input_ids"].shape[-1]
130
+
131
+ eos_token_id = self.tokenizer.convert_tokens_to_ids(DEFAULT_EOS_TOKEN)
132