File size: 3,801 Bytes
70dbc50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
---
license: apache-2.0
base_model: amazingvince/Not-WizardLM-2-7B
pipeline_tag: text-generation
---
# QuantFactory/Not-WizardLM-2-7B-GGUF
This is quantized version of [amazingvince/Not-WizardLM-2-7B]() created using llama.cpp
# Model Description
<a href="https://colab.research.google.com/gist/pszemraj/d3d74ceab942722b49188606785e2bfd/not-wizardlm-2-7b-inference.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
Included is code ripped from fastchat with the expected chat templating.
Also wiz.pdf is a pdf of the github blog showing the apache 2 release.
Link to wayback machine included: https://web.archive.org/web/20240415221214/https://wizardlm.github.io/WizardLM2/
## example
```python
import dataclasses
from enum import auto, Enum
from typing import List, Tuple, Any
class SeparatorStyle(Enum):
"""Different separator style."""
SINGLE = auto()
TWO = auto()
@dataclasses.dataclass
class Conversation:
"""A class that keeps all conversation history."""
system: str
roles: List[str]
messages: List[List[str]]
offset: int
sep_style: SeparatorStyle = SeparatorStyle.SINGLE
sep: str = "###"
sep2: str = None
# Used for gradio server
skip_next: bool = False
conv_id: Any = None
def get_prompt(self):
if self.sep_style == SeparatorStyle.SINGLE:
ret = self.system
for role, message in self.messages:
if message:
ret += self.sep + " " + role + ": " + message
else:
ret += self.sep + " " + role + ":"
return ret
elif self.sep_style == SeparatorStyle.TWO:
seps = [self.sep, self.sep2]
ret = self.system + seps[0]
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
else:
raise ValueError(f"Invalid style: {self.sep_style}")
def append_message(self, role, message):
self.messages.append([role, message])
def to_gradio_chatbot(self):
ret = []
for i, (role, msg) in enumerate(self.messages[self.offset:]):
if i % 2 == 0:
ret.append([msg, None])
else:
ret[-1][-1] = msg
return ret
def copy(self):
return Conversation(
system=self.system,
roles=self.roles,
messages=[[x, y] for x, y in self.messages],
offset=self.offset,
sep_style=self.sep_style,
sep=self.sep,
sep2=self.sep2,
conv_id=self.conv_id)
def dict(self):
return {
"system": self.system,
"roles": self.roles,
"messages": self.messages,
"offset": self.offset,
"sep": self.sep,
"sep2": self.sep2,
"conv_id": self.conv_id,
}
conv = Conversation(
system="A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's questions.",
roles=("USER", "ASSISTANT"),
messages=[],
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
conv.append_message(conv.roles[0], "Why would Microsoft take this down?")
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
result = model.generate(**inputs, max_new_tokens=1000)
generated_ids = result[0]
generated_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
print(generated_text)
``` |