primer-llm-embedding / python /code /save_qwen_embeddings.py
hesamation's picture
Remove unnecessary files and update .gitignore to exclude new artifacts. Added 'embeddings_qwen.pth' to .gitignore.
4d72731
raw
history blame contribute delete
638 Bytes
import torch
from transformers import AutoTokenizer, AutoModel
tokenizer_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
model_name = tokenizer_name
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# Load the pre-trained model
model = AutoModel.from_pretrained(model_name)
# Extract the embeddings layer
embeddings = model.get_input_embeddings()
# Print out the embeddings
print(f"Extracted Embeddings Layer for {model_name}: {embeddings}")
# Save the embeddings layer
torch.save(embeddings.state_dict(), r"python\code\files\embeddings_qwen.pth")