Instructions to use microsoft/Dayhoff-170m-GR with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use microsoft/Dayhoff-170m-GR with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="microsoft/Dayhoff-170m-GR")

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("microsoft/Dayhoff-170m-GR")
model = AutoModelForCausalLM.from_pretrained("microsoft/Dayhoff-170m-GR")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use microsoft/Dayhoff-170m-GR with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "microsoft/Dayhoff-170m-GR"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "microsoft/Dayhoff-170m-GR",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/microsoft/Dayhoff-170m-GR

SGLang

How to use microsoft/Dayhoff-170m-GR with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "microsoft/Dayhoff-170m-GR" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "microsoft/Dayhoff-170m-GR",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "microsoft/Dayhoff-170m-GR" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "microsoft/Dayhoff-170m-GR",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use microsoft/Dayhoff-170m-GR with Docker Model Runner:
```
docker model run hf.co/microsoft/Dayhoff-170m-GR
```

Dayhoff-170m-GR / tokenizers.py

sarahalamdari

Add files using upload-large-folder tool

5e3e614 verified 11 months ago

raw

history blame

4.25 kB

	from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
	from typing import List, Optional, Union
	import os

	MASK = "#"
	MSA_PAD = "!"
	UL_ALPHABET_PLUS = "ACDEFGHIKLMNPQRSTVWYBZXJOU-*#@!/[]{}"
	MSA_AAS = "ACDEFGHIKLMNPQRSTVWYBZXJOU-"
	GAP = "-"
	START = "@"
	STOP = "*"
	SEP = "/"
	END_AL = "]"
	END_UL = "}"
	START_AL = "["
	START_UL = "{"

	class ProteinTokenizer(PreTrainedTokenizer):

	def __init__(
	self,
	protein_alphabet: str = UL_ALPHABET_PLUS,
	model_max_length: int = 2048,
	pad_token=MSA_PAD,
	mask_token=MASK,
	all_aas=MSA_AAS,
	gap_token=GAP,
	bos_token=START,
	eos_token=STOP,
	sep_token=SEP,
	**kwargs
	):
	"""Character tokenizer for Hugging Face transformers.

	model_max_length (int): Model maximum sequence length.
	"""
	self.alphabet = list("".join(protein_alphabet))
	self.all_aas = list("".join(all_aas))
	self.a_to_i = {u: i for i, u in enumerate(self.alphabet)}
	self.i_to_a = {i: u for i, u in enumerate(self.alphabet)}
	self.gap_token = gap_token


	bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
	eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
	sep_token = AddedToken(sep_token, lstrip=False, rstrip=False) if isinstance(sep_token, str) else sep_token
	mask_token = AddedToken(mask_token, lstrip=False, rstrip=False) if isinstance(mask_token, str) else mask_token
	pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
	gap_token = AddedToken(gap_token, lstrip=False, rstrip=False) if isinstance(gap_token, str) else gap_token

	super().__init__(
	pad_token=pad_token,
	mask_token=mask_token,
	eos_token=eos_token,
	bos_token=bos_token,
	sep_token=sep_token,
	model_max_length=model_max_length,
	**kwargs
	)

	@property
	def vocab_size(self):
	return len(self.alphabet)

	@property
	def gap_token_id(self):
	return self.convert_tokens_to_ids(self.gap_token)

	def get_vocab(self):
	return self.a_to_i

	def _tokenize(self, text: str) -> List[str]:
	return list(text)

	def _convert_token_to_id(self, token) -> int:
	return self.a_to_i[token]

	def _convert_id_to_token(self, index) -> str:
	return self.i_to_a[index]

	def convert_tokens_to_string(self, tokens):
	return "".join(tokens)

	def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
	result = token_ids_0
	if token_ids_1 is not None:
	raise NotImplementedError("This tokenizer does not support two sequences")
	return result

	def get_special_tokens_mask(
	self,
	token_ids_0: List[int],
	token_ids_1: Optional[List[int]] = None,
	already_has_special_tokens: bool = False,
	) -> List[int]:
	if already_has_special_tokens:
	return super().get_special_tokens_mask(
	token_ids_0=token_ids_0,
	token_ids_1=token_ids_1,
	already_has_special_tokens=True,
	)

	result = [0] * len(token_ids_0)
	if token_ids_1 is not None:
	raise NotImplementedError("This tokenizer does not support two sequences")

	return result

	def create_token_type_ids_from_sequences(
	self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
	) -> List[int]:
	"""
	Identifies the type of token. 0 for the first sentence, 1 for the second sentence if it exists
	"""

	result = len(token_ids_0) * [0]

	if token_ids_1 is not None:
	raise NotImplementedError("This tokenizer does not support two sequences")
	return result

	def save_pretrained(self, save_directory: Union[str, os.PathLike], **kwargs):
	super().save_pretrained(save_directory, **kwargs)

	def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
	return ()