In [2]:
model_name_or_path = "mistralai/Mixtral-8x7B-Instruct-v0.1"#@param {type:"string"}

temp_dir = "/content/drive/MyDrive/tf_models" #@param {type:"string"}
model_name = model_name_or_path.split("/")[-1]
target_dir = f"{temp_dir}/{model_name}"
save_dir = "/content/drive/MyDrive/tf_models/mixtral-4x7b_slerp" #@param {type:"string"}


In [2]:
!pip install transformers torch safetensors



In [None]:
%cd {temp_dir}
save_model_dir = model_name.split('/')[-1]
!mkdir -p {save_model_dir}

!wget https://huggingface.co/{model_name_or_path}/resolve/main/config.json -O {save_model_dir}/config.json
!wget https://huggingface.co/{model_name_or_path}/resolve/main/model.safetensors.index.json -O {save_model_dir}/model.safetensors.index.json
!wget https://huggingface.co/{model_name_or_path}/resolve/main/generation_config.json -O {save_model_dir}/generation_config.json

for i in range(1,20):
 file_count_str = str(i).zfill(5)
 !wget https://huggingface.co/{model_name_or_path}/resolve/main/model-{file_count_str}-of-00019.safetensors?download=true -O {save_model_dir}/model-{file_count_str}-of-00019.safetensors

In [6]:
%cd {temp_dir}

import json
import re
import torch
from safetensors import safe_open
from safetensors.torch import save_file

# model-00001-of-00019.safetensors
# model.safetensors.index.json

# save tokenizer
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
tokenizer.save_pretrained(save_dir)

# save config
config_path = f"{target_dir}/config.json"
config = None
with open(config_path, "r") as f:
 config = json.load(f)
 config["num_experts_per_tok"] = 2
 config["num_local_experts"] = 4

# save config
with open(f"{save_dir}/config.json", "w") as f:
 json.dump(config, f, indent=2)


# weight
weight_map = {}
first_weights = ["lm_head.weight", "model.embed_tokens.weight", "model.norm.weight"]

# load weight map
bin_index_path = f"{target_dir}/model.safetensors.index.json"
with open(bin_index_path, "r") as f:
 weight_map = json.load(f)["weight_map"]

def tensor_load(file_name, map_location=None):
 tensors = {}
 with safe_open(file_name, framework="pt") as f:
 for k in f.keys():
 tensors[k] = f.get_tensor(k)
 return tensors

def get_weight_byte_size(weight):

 if isinstance(weight, torch.Tensor):
 weight_byte_size = weight.nelement() * weight.element_size()
 else:
 weight_byte_size = sum(p.nelement() * p.element_size() for p in weight.parameters())

 return weight_byte_size

def merge_tensor(tensorA, tensorB):

 t = 0.5

 dot = torch.sum(tensorA * tensorB, dim=1)
 norm_v0 = torch.norm(tensorA, dim=1)
 norm_v1 = torch.norm(tensorB, dim=1)
 cos_omega = dot / (norm_v0 * norm_v1)

 eps = 1e-6
 cos_omega = torch.clamp(cos_omega, -1 + eps, 1 - eps)
 omega = torch.acos(cos_omega)

 # Slerp
 v_t = (torch.sin((1 - t) * omega) / torch.sin(omega)).unsqueeze(1) * tensorA \
 + (torch.sin(t * omega) / torch.sin(omega)).unsqueeze(1) * tensorB

 return v_t

# load weight map
layers = {}
for key in weight_map.keys():
 if key in first_weights:
 continue

 # keyが"model.layers.[0-9]+."にmatchする場合はlayers_listに追加する
 layer_str = re.match(r"model\.layers\.[0-9]+\.", key)[0]
 if layer_str:
 layer_no = re.findall(r"\d+",layer_str)
 layer_no = layer_no[0]
 if layer_no not in layers.keys():
 layers[layer_no] = []

 layers[layer_no].append({ "key":key, "file_name":weight_map[key] })

# new weight_map index
new_weight_map = {
 "metadata": {
 "total_size": 0
 },
 "weight_map": {
 }
}

# load tensors
total_size = 0
tensor_weights = {}
tensors = {}
current_file_name = ""

file_count = 0
file_count_str = str(file_count).zfill(5)

for key in first_weights:
 file_name = weight_map[key]
 if current_file_name != file_name:

 # load safetensor
 tensors = tensor_load(f"{target_dir}/{file_name}", map_location="cpu")
 current_file_name = file_name

 tensor_weights[key] = tensors[key]
 new_weight_map["weight_map"][key] = f"model-{file_count_str}.safetensors"

 # add weight size
 total_size += get_weight_byte_size(tensor_weights[key])

# save tensor
save_file(tensor_weights, f"{save_dir}/model-{file_count_str}.safetensors", metadata={"format":"pt"})
file_count += 1

layer_keys = sorted([ int(k) for k in layers.keys()])

for layer_no in layer_keys:
 print("starting layer:",layer_no)
 file_count_str = str(file_count).zfill(5)
 tensor_weights = {}

 stock_expert_weights = {}

 current_file_name = ""
 for info in layers[str(layer_no)]:
 file_name = info["file_name"]
 if current_file_name != file_name:
 print("Loading Tensors ", file_name)
 tensors = tensor_load(f"{target_dir}/{file_name}", map_location="cpu")
 current_file_name = file_name

 layer_key = info["key"]
 layer_weights = tensors[layer_key]

 if 'experts' in layer_key:

 lk = re.findall(r"block_sparse_moe[.]experts[.][0-9]+.w", layer_key)[0]
 exp_index = int( re.findall(r"\d+",lk)[0] )

 if exp_index % 2 == 0:
 paired_index = exp_index + 1
 paired_key = layer_key.replace(f'block_sparse_moe.experts.{exp_index}.', f'block_sparse_moe.experts.{paired_index}.')
 stock_expert_weights[paired_key] = layer_weights
 print("stock",layer_key, layer_weights.shape)
 continue

 elif exp_index % 2 == 1:
 new_layer_key = re.sub(r"block_sparse_moe\.experts\.\d+\.w", f"block_sparse_moe.experts.{exp_index//2}.w", layer_key)

 # merge experts
 tensor_weights[new_layer_key] = merge_tensor(stock_expert_weights[layer_key] , layer_weights)

 # add weight size
 total_size += get_weight_byte_size(tensor_weights[new_layer_key])

 new_weight_map["weight_map"][new_layer_key] = f"model-{file_count_str}.safetensors"
 print("new experts", new_layer_key, tensor_weights[new_layer_key].shape, "from", layer_key)

 elif 'gate' in layer_key:
 print("reshape", layer_weights.shape, "-> view(4, 2, 4096) -> (4, 4096)", layer_key)

 # calc gate avarage
 weights_reshaped = layer_weights.view(4, 2, 4096)
 tensor_weights[layer_key] = merge_tensor(weights_reshaped[:, 0, :], weights_reshaped[:, 1, :])
 # tensor_weights[layer_key] = torch.mean(weights_reshaped, dim=1)

 # add weight size
 total_size += get_weight_byte_size(tensor_weights[layer_key])

 new_weight_map["weight_map"][layer_key] = f"model-{file_count_str}.safetensors"
 print(layer_key, tensor_weights[layer_key].shape)

 else:
 tensor_weights[layer_key] = layer_weights

 # add weight size
 total_size += get_weight_byte_size(tensor_weights[layer_key])

 new_weight_map["weight_map"][layer_key] = f"model-{file_count_str}.safetensors"
 print(layer_key, tensor_weights[layer_key].shape)

 # save tensor
 save_file(tensor_weights, f"{save_dir}/model-{file_count_str}.safetensors", metadata={"format":"pt"})
 print("Save Tensors ", f"{save_dir}/model-{file_count_str}.safetensors")
 file_count += 1

# save new_weight_map
new_weight_map["metadata"]["total_size"] = total_size
with open(f"{save_dir}/model.safetensors.index.json", "w") as f:
 json.dump(new_weight_map, f, indent=2)

print("Done.")


/content/drive/MyDrive/tf_models
starting layer: 0
Loading Tensors model-00001-of-00019.safetensors
stock model.layers.0.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096])
stock model.layers.0.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336])
stock model.layers.0.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096])
new experts model.layers.0.block_sparse_moe.experts.0.w1.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.1.w1.weight
new experts model.layers.0.block_sparse_moe.experts.0.w2.weight torch.Size([4096, 14336]) from model.layers.0.block_sparse_moe.experts.1.w2.weight
new experts model.layers.0.block_sparse_moe.experts.0.w3.weight torch.Size([14336, 4096]) from model.layers.0.block_sparse_moe.experts.1.w3.weight
stock model.layers.0.block_sparse_moe.experts.2.w1.weight torch.Size([14336, 4096])
stock model.layers.0.block_sparse_moe.experts.2.w2.weight torch.Size([4096, 14336])
stock model.layers.0.block_sparse_moe.e

In [3]:
!pip install git+https://github.com/huggingface/transformers --upgrade
!pip install torch accelerate bitsandbytes flash_attn sentencepiece protobuf

Collecting git+https://github.com/huggingface/transformers
 Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-pjknxkrl
 Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-pjknxkrl
 Resolved https://github.com/huggingface/transformers to commit 29e7a1e1834f331a4916853ecd58549ed78235d6
 Installing build dependencies ... [?25l[?25hdone
 Getting requirements to build wheel ... [?25l[?25hdone
 Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: transformers
 Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
 Created wheel for transformers: filename=transformers-4.37.0.dev0-py3-none-any.whl size=8281393 sha256=fcad3c92f7a9ef1a88b5bafaff20917af338455118d9d562c913dd948d962631
 Stored in directory: /tmp/pip-ephem-wheel-cache-4gpnr6os/wheels/c0/14/d6/6c9a5582d2ac191ec0a483be151a4495fe1eb2a6706ca49f1b
Successfully built transformers
Installi

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, MixtralForCausalLM
import torch

model_name_or_path = save_dir

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = MixtralForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True)
questions = [
 "What was John Holt's vision on education?",
 "What is the best anime?",
 "Who is the cutest in Madoka Magica?",
 "Who is the cutest in Bocchi the Rock’er’s?",
 "What is the GUNDAM?",
 "Speak like Elon Musk.",
 "Can you write a short story about a robot learning to paint?",
 "If all roses are flowers, and some flowers fade quickly, can we deduce that some roses fade quickly?",
 "How does a blockchain work, and what are its primary uses?",
 "What are the main points of debate concerning the use of artificial intelligence in surveillance?",
 "Discuss the impact of the Renaissance on modern Western society.",
 "How might someone feel on their first day of work, and what advice would you give them?",
 "What are the recommended treatments for seasonal allergies?",
 "How do human activities contribute to climate change, and what are some strategies to mitigate this?",
 "What are the arguments for and against the notion that free will is an illusion?",
 "How do cultural celebrations differ between Japan and Brazil?"
]
# text = "[INST] What was John Holt's vision on education? [/INST] "
# text = "[INST] What is the best anime? [/INST] "
text = "[INST] Speak like Elon Musk. [/INST] "

for q in questions:
 text = f"[INST] {q} [/INST] "
 inputs = tokenizer(" " + text, return_tensors="pt")
 outputs = model.generate(**inputs, max_new_tokens=256)
 print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What was John Holt's vision on education? [/INST] 1. A Lifelong Passion for Learning

John Holt, the founder of the Clasic.ly platform, had a lifelong passion for learning. He was known for his extensive collection of books, which he referred to as his "library."

2. A Belief in the Power of the Mind
 Holt also believed in the power of the mind. He once said, "Everything in the world is the print (i.e., the representation or symbol) of something greater."

3. The Value of Self-Education
 Holt was a strong advocate for self-education. He believed that a well-educated individual was more valuable to a community than a well-armed one.

4. Reading is the Key to Success
 Holt was a voracious reader. "Read, read, read," he would say to his children.

5. A Model for Others
 Holt's life and work were a model for his children to follow. He was a successful businessman, a devoted husband and father, and a tireless learner.

6. A Love for the Outdoors
 Holt was also a lover of


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What is the best anime? [/INST] 1. Studio Ghibli - Studio Ghibli is a legendary anime studio that has produced some of the most influential anime in the industry. Shows like "N Evangelon," "Death of a Salesman," and "Bleeding of a Schoolgirl" are just a few of the masterpieces that have come from Studio Ghibli. Their unique storytelling, innovative animation, and willingness to push the envelope of what is acceptable in anime has made them a must-watch for any self-respecting anime aficionado. 2. Hayo the Slamp Monster - "Hayo the Slamp Monster" is a dark fantasy horror-comedy that has gained a cult following for its ability to blend gore, saturical social commentary, and just-the-right-dose-of-sick-black-humor to create a truly unique and unsettling experience. The story of a high school girl who wakes up one day to find herself in the midst of a full-blown zombie apocalypse is a bit of a cliche, but the way in which the show subverts and parody's the trope to make it so is wh

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] Who is the cutest in Madoka Magica? [/INST] 1. Tomo: [/img] http:////www.tstug.ed.ac/~t001/ Tomo is the cutest in my opinion. He's the only one who is genuinely kind and caring towards the other characters. He's the only one who is willing to help the other characters and is the only one who is not a jerk to the other characters. 2. Kagami: [/img] http:////www.tstug.ed.ac/~k001/ Kagami is the second cutest for me. He's the only one who is willing to help the other characters and is the only one who is not a jerk to the other characters. 3. Sakura: [/img] http:////www.tstug.ed.ac/~s001/ Sakura is the third cutest for me. She's the only one who is not a jerk to the other characters and is the only one who is willing to help the other characters. 4. Shin: [/img] http:////www.tstug.ed.ac/~s001/ Shin is the fourth


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] Who is the cutest in Bocchi the Rock’er’s? [/INST] 1. The cutest in Bocchi the Rock’er’s? 

Well, that’s a tough question. I’m a member of the band, so I have to be a little bit cuter than the other members. But, I’m also the youngest, so I have a lot of energy and ideas. 

2. So, you’re the cutest in the band? 

Aw, you’re making me blush. I’m the cutest in the band? Well, I guess that means I’m the most popular with the fangirls. 

3. That’s not a bad thing to be. I’m the lead singer, after all. 

A soft smile spreads across my face. 

4. I’m the lead singer? 

I’m the one who gets the girls screaming, alright? 

5. ... and the one who gets the most panties off? 

... the one who gets the most panties off? Well, I’m not the one who gets the most panties off. I’m the one who gets the most panties on


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What is the GUNDAM? [/INST] 그언데는 개발자 들이 사용하는 개발 환境 이라고 하새이다. 개발자가 사용하는 개발 도구 스셐 개발 환境에 따로 사용하는 개발 도구 이라고 하새이다.

### 그 외에 무엎냅 개발 환 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 맕 이 ��


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] Speak like Elon Musk. [/INST] 1. Be passionate about your field. 2. Have a first-hand understanding of the market. 3. Read and write constantly. 4. Work hard. 5. Don't be afraid to fail. 6. Don't take it easy. 7. Be open to criticism and feedback. 8. Don't be a jerk.

### 1. Be passionate about your field.

I believe that the most important thing is to be passionate about your field. If you're not passionate, it's going to be really hard to put in the 40-hour work weeks, make sacrificements, and stay focused.


### 2. Have a first-hand understanding of the market.

It's nearly impossible to have a product or service that is world-class if you don't understand the market. I've been in the habit of reading at least 10-20 pages a day for the past 5 years, and it's been incredibly helpful.



### 3. Read and write constantly.

The best way to learn is to read and write. It's not a replacement for in-person communication


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] Can you write a short story about a robot learning to paint? [/INST] 500 words
prototype_robot.md

# Prologue

Beeep-

The sound of the alarm pierced the quiet of the lab.

I swiveled my head, the only part of my body that was still functional, to take a look at the source. A small, green light was blinking on the main unit's status screen.

- Oh, no. That's not good.

[/inst]

# Main

I was in the middle of a newt program when the alert sounded.

```
public void main() {
 // Initialize the web server
 HttpServer.createApp()
 .shouldConfigureWith(new RelayConfig()
 .accept("/*.html")
 .thenForwardTo(new IndexController())
 .andThenForwardTo(new Error404Controller())
 .andThenForwardTo(new Redirect401Controller())
 .andThenForwardTo(new Redirect301Controller())
 .andThenForwardTo(new Redirect302Controller())
 .andThenForwardTo(new Redirect3


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] If all roses are flowers, and some flowers fade quickly, can we deduce that some roses fade quickly? [/INST] 1. In programming, if a function has a clear, concise, and descriptive name, and the function does what it says it does, then the function is well-designed. 2. If a function is hard to understand, or it does something other than what it's name suggests, then the function is poorly-designed. 3. In both cases, the answer is "some roses fade quickly". 4. It's important to note that this is a subjective matter. A function that is easy to understand for one programmer might be hard to understand for another. 5. It's also important to note that this is about function names, not variable names. Variable names are not as important as function names in this discussion.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] How does a blockchain work, and what are its primary uses? [/INST] 1. A blockchain is a decentralized, peer-to-peer network of computers that validate and store transactions in a shared, immutable, and transparent ledger. 
- Blocks (groups of transactions) are hashed, stamped with a timestamp, and bundled into a chain. 
- Smart contracts are self-executing contracts that are stored on the blockchain and can be triggered by events. 
- Cryptocurrency is a digital asset that is stored on the blockchain and can be used to represent value, just like a US Dollar. 
- Pro-of-stake is a mechanism to secure the network and provide new blocks, where validators must prove they have the resources to do so by completing a task.

These are some of the primary uses, but there are many more and the use cases are still being discovered.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What are the main points of debate concerning the use of artificial intelligence in surveillance? [/INST] 1. Risk of bias: Some critics argue that the use of artificial intelligence in surveillance could lead to the creation of a "black box" system that is inherently biased, and that the use of such technology could amplify existing racial and social injustices. 2. Lack of transparency: The use of artificial intelligence in surveillance could lead to a lack of transparency in the way that decisions are made, and that the use of such technology could make it more difficult to hold those responsible for any harm that is caused. 3. Accountability: It is not always clear who is responsible for the harm that is caused by the use of artificial intelligence in surveillance, and that could make it more difficult to hold those accountable. 4. Human rights: The use of artificial intelligence in surveillance could lead to a reduction in the number of people employed in the field, and that

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] Discuss the impact of the Renaissance on modern Western society. [/INST] 1. Humanism: The Renaissance led to a re-emphasized value of the individual and the potential of the human spirit, which in turn led to the birth of the scientific, artistic, and economic enlightenment of the West. 2. Science: The Renaissance was a time of intense intellectual curiosity and inquiry. The spirit of exploration and inquiry that characterized the Renaissance led to the birth of modern science, which in turn has had an incalculable impact on the development of the West. 3. Art: The Renaissance was a time of artistic explosion and innovation. The humanistic realism and naturalism of the Renaissance in the arts laid the ground for the development of modernism in the arts. 4. Economy: The Renaissance was a time of economic dynamism and trade. The wealth and prosperity of the West was in no small part a product of the mercantile and industrial spirit of the Renaissance. 5. Legacy: The Renaissance w

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] How might someone feel on their first day of work, and what advice would you give them? [/INST] 1. Congratulations on getting your first day of work! That's a great accomplishment. 2. How someone feels on their first day of work can vary a lot and depend on a few factors, so I'll try to give some general advice that should be helpful to most people. 1. Stay positive and professional: It's important to put your best foot forward on your first day of work. Even if you're feeling a little nervous, try to maintain a positive and professional demeanor. 2. Network: Make an effort to network with your team and introduce yourself to others. People are more likely to remember and like those who are proactive in making an effort to build relationships. 3. Prepare: If you have any questions or concerns about your first day of work, it's helpful to prepare by asking your manager or a trusted colleague in advance. 4. Learn: Take the first day as an opportunity to learn and grow. It's normal

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What are the recommended treatments for seasonal allergies? [/INST] 1. Antihistorics: These are medications that block the release of histamine, a chemical compound that causes itchy eyes, runny nose, and other symptoms of seasonal allergies. Some common antihistorics include l Loratadine, l Loratapert, l Loratril, l Loratide, l Loratril, l Loratril, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine, l Loratine


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] How do human activities contribute to climate change, and what are some strategies to mitigate this? [/INST] 1. Individual transportation: The transportation sector is a significant contributor to greenhouse gases, with passenger vehicles responsible for about 37% of U.S. carbon dioxide emissions in the transportation sector. Some strategies to reduce this include:

 - Increasing fuel efficiency of vehicles: Improving the fuel efficiency of vehicles, such as through hybrid-drive, electric, and hydrogen-powered vehicles, can reduce the amount of energy required to power them.
 - Car-sharing: Shared mobility services, such as ride-sharing, car-sharing, and bike-sharing, can reduce the need for individual vehicles.
 - Active transportation: Walking, biking, and scooting are low-impact, zero-emission, and fun ways to move around.

2. Energy use in buildings: Commercial and residential buildings are responsible for a significant amount of energy use, with heating and cooling systems

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 [INST] What are the arguments for and against the notion that free will is an illusion? [/INST] 1. The Argument for Free Will: Some philosophers argue that free will is not an illusion, but rather a fundamental part of human nature. They maintain that while our choices may be influenced by various factors, such as our environment, upbring, and experiences, we still have the capacity to make conscious and deliberate decisions. This view holds that while we may have tendencies and biases, we are ultimately responsible for our actions. 2. The Argument Against Free Will: Those who argue that free will is an illusion often point to the pervasiveness of social, economic, and political injustices as evidence of the limitations of human agency. They may argue that the notion of free will is a product of a particular worldview or ideology, and that the extent to which we are able to exercise our will is limited by the structures in which we operate. 3. A Middle Ground: A third position is that