Instructions to use 4ervonec19/SimpleTestGenerator with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use 4ervonec19/SimpleTestGenerator with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="4ervonec19/SimpleTestGenerator")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("4ervonec19/SimpleTestGenerator") model = AutoModelForCausalLM.from_pretrained("4ervonec19/SimpleTestGenerator") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use 4ervonec19/SimpleTestGenerator with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "4ervonec19/SimpleTestGenerator" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "4ervonec19/SimpleTestGenerator", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/4ervonec19/SimpleTestGenerator
- SGLang
How to use 4ervonec19/SimpleTestGenerator with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "4ervonec19/SimpleTestGenerator" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "4ervonec19/SimpleTestGenerator", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "4ervonec19/SimpleTestGenerator" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "4ervonec19/SimpleTestGenerator", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use 4ervonec19/SimpleTestGenerator with Docker Model Runner:
docker model run hf.co/4ervonec19/SimpleTestGenerator
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import logging | |
| logging.getLogger("transformers").setLevel(logging.ERROR) | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| prompt_string = "Generate test cases for a given Python function using its source code. The output should be a series of assert statements that verify the function's correctness. \n\nExample:\n\nInput:\n\ndef add(a, b):\n\treturn a + b\n \n\nOutput:\n\ndef test_add(a, b):\n\n\tassert add(1, 2) == 3\n\tassert add(-1, 1) == 0\n\tassert add(0, 0) == 0\n\tassert add(1.5, 2.5) == 4.0\n\nMy Input:\n\n" | |
| max_length = 512 | |
| params_inference = { | |
| 'max_new_tokens': 512, | |
| 'do_sample': True, | |
| 'top_k': 100, | |
| 'top_p': 0.85, | |
| } | |
| num_lines = 5 | |
| gpt2_name="bigcode/gpt_bigcode-santacoder" | |
| saved_model_path = '/Users/chervonikov_alexey/Desktop/result_gpt_bigcode_15_epochs/model_val_loss.pth' | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| class LargeCodeModelGPTBigCode(nn.Module): | |
| '''Класс для большой языковой модели, которая обрабатывает входной код''' | |
| def __init__(self, gpt2_name=gpt2_name, | |
| prompt_string = prompt_string, | |
| params_inference = params_inference, | |
| max_length = max_length, | |
| device = device, | |
| saved_model_path = saved_model_path, | |
| num_lines = num_lines, | |
| flag_hugging_face = False, | |
| flag_pretrained = False): | |
| ''' | |
| Конструктор класса. Необходим для инициализации модели | |
| Параметры: | |
| -gpt2_name: link модели на HuggingFace (default: "bigcode/gpt_bigcode-santacoder") | |
| -prompt_string: дополнительная обертка для лучшего понимания моделью задачи (default: prompt_string) | |
| -params_inference: параметры инференса (для использования self.gpt2.generate(**inputs, | |
| **inference_params)) | |
| -max_length: максимальное число токенов в последовательности (default: 512) | |
| -device: устройство (default: cuda or cpu) | |
| -saved_model_path: путь к затюненной модели | |
| -num_lines: число линий (ввиду "неконечной" генерации модели) | |
| -flag_hugging_face: флаг для использования HuggingFace (default: False) | |
| -flag_pretrained: флаг для инициализации модели затюненными весами | |
| ''' | |
| super(LargeCodeModelGPTBigCode, self).__init__() | |
| self.new_special_tokens = ['<FUNC_TOKEN>', | |
| '<INFO_TOKEN>', | |
| '<CLS_TOKEN>', | |
| '<AST_TOKEN>', | |
| '<DESCRIPTION_TOKEN>', | |
| '<COMMENTS_TOKEN>'] | |
| self.special_tokens_dict = { | |
| 'additional_special_tokens': self.new_special_tokens | |
| } | |
| self.tokenizerGPT = AutoTokenizer.from_pretrained(gpt2_name, padding_side='left') | |
| self.tokenizerGPT.add_special_tokens({'pad_token': '<PAD>'}) | |
| self.tokenizerGPT.add_special_tokens(self.special_tokens_dict) | |
| self.gpt2 = AutoModelForCausalLM.from_pretrained(gpt2_name) | |
| self.gpt2.resize_token_embeddings(len(self.tokenizerGPT)) | |
| self.max_length = max_length | |
| self.inference_params = params_inference | |
| self.additional_prompt = prompt_string | |
| self.pretrained_path = saved_model_path | |
| self.device = device | |
| self.num_lines = num_lines | |
| if flag_pretrained == True and flag_hugging_face == False: | |
| if self.device == "cuda": | |
| self.load_state_dict(torch.load(self.pretrained_path)) | |
| else: | |
| self.load_state_dict(torch.load(self.pretrained_path, | |
| map_location=torch.device('cpu'))) | |
| # forward call | |
| def forward(self, input_ids, attention_mask, | |
| response_ids): | |
| ''' | |
| Forward call method | |
| Параметры: | |
| -input_ids: входные токены | |
| -attention_mask: маска внимания | |
| -response_ids: метки | |
| Returns: | |
| -результат forward call | |
| ''' | |
| gpt2_outputs = self.gpt2(input_ids = input_ids, | |
| attention_mask = attention_mask, | |
| labels = response_ids) | |
| return gpt2_outputs | |
| def decode_sequence(tokens_ids, tokenizer): | |
| ''' | |
| Декодирование последовательности токенов | |
| Параметры: | |
| -tokens_ids: последоавтельность токенов | |
| -tokenizer: токенизатор | |
| Returns: | |
| -code_decoded: Декодированная последовательность | |
| ''' | |
| code_decoded = tokenizer.decode(tokens_ids, skip_special_tokens = True) | |
| return code_decoded | |
| def remove_before_substring(text, substring="My Output:"): | |
| ''' | |
| Вспомогательная утилита, чтобы убрать все лишнее | |
| Параметры: | |
| -text: строка | |
| -substring: подстрока (default: "My Output:") | |
| Returns: | |
| -text: обновленный текст | |
| ''' | |
| index = text.find(substring) | |
| if index != -1: | |
| # Вернуть часть строки, начиная с подстроки | |
| return text[index:] | |
| return text | |
| def extract_text_between_markers(text, start_marker, end_marker): | |
| ''' | |
| Утилита для работы с входной строкой (получение чистой входной функции) | |
| Параметры: | |
| -text: входная строка | |
| -start_marker: стартовый маркер | |
| -end_marker: конечный маркер | |
| Returns: | |
| -Отфильтрованный текст | |
| ''' | |
| start_index = text.find(start_marker) | |
| end_index = text.find(end_marker) | |
| if start_index == -1 or end_index == -1 or start_index >= end_index: | |
| return None | |
| return text[start_index + len(start_marker):end_index].strip() | |
| def input_inference(self, code_text): | |
| ''' | |
| Инференс входной строки кода | |
| Параметры: | |
| -code_text: строка с кодом | |
| Returns: | |
| -dict: { | |
| 'input_function': input_function, | |
| 'generated_output': output_string | |
| } | |
| Словарь в формате input_function + generated_output | |
| ''' | |
| model_input = self.additional_prompt + code_text + "\n\nMy Output:\n\n" | |
| def encode_text(text, tokenizer = self.tokenizerGPT): | |
| encoding = tokenizer.encode_plus( | |
| text, | |
| add_special_tokens=True, | |
| max_length=max_length, | |
| padding='max_length', | |
| truncation=True, | |
| return_attention_mask=True, | |
| return_tensors='pt', | |
| ) | |
| input_ids_code_text = encoding['input_ids'].flatten() | |
| attention_mask_code_text = encoding['attention_mask'].flatten() | |
| return input_ids_code_text, attention_mask_code_text | |
| input_ids_focal_method, attention_mask_focal_method = encode_text(model_input) | |
| self.eval() | |
| with torch.no_grad(): | |
| inputs = {'input_ids': input_ids_focal_method.unsqueeze(0).to(device), | |
| 'attention_mask': attention_mask_focal_method.unsqueeze(0).to(device)} | |
| input_function = self.decode_sequence(tokens_ids = inputs['input_ids'][0], | |
| tokenizer=self.tokenizerGPT) | |
| input_function = self.extract_text_between_markers(input_function, | |
| 'My Input:', | |
| 'My Output:'), | |
| output = self.gpt2.generate(**inputs, | |
| **self.inference_params) | |
| output_string = self.decode_sequence(tokens_ids = output[0], | |
| tokenizer=self.tokenizerGPT) | |
| output_string = self.remove_before_substring(output_string) \ | |
| .replace('My Output:', "") \ | |
| .strip() | |
| output_string = "\n".join(output_string.splitlines()[:self.num_lines]) | |
| return { | |
| 'input_function': input_function, | |
| 'generated_output': output_string | |
| } | |
| if __name__ == "__main__": | |
| CodeModel = LargeCodeModelGPTBigCode(gpt2_name, flag_pretrained=True) | |