|
--- |
|
license: apache-2.0 |
|
language: |
|
- fa |
|
--- |
|
|
|
### Installing Libraries |
|
Make sure these libraries are installed correctly. |
|
* ```pip install -q sentencepiece ``` |
|
* ```pip install -q transformers ``` |
|
* ```pip install -q accelerate ``` |
|
* ```pip install --upgrade -q bitsandbytes ``` |
|
|
|
|
|
```python |
|
|
|
import torch |
|
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer |
|
|
|
model_path = "Neurai/llama7b" |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"Neurai/llama7b", |
|
# load_in_8bit=True, |
|
torch_dtype=torch.bfloat16, |
|
low_cpu_mem_usage=True, |
|
device_map="auto", |
|
) |
|
model.eval() |
|
print('model loaded') |
|
|
|
SYS_PROMPT = "زرافه چند سال عمر میکند؟" |
|
|
|
def response_generate(input_prompt): |
|
input_ids = tokenizer(input_prompt, return_tensors="pt") |
|
outputs = model.generate( |
|
inputs=input_ids["input_ids"].to("cuda"), |
|
attention_mask=input_ids["attention_mask"].to("cuda"), |
|
do_sample=True, |
|
temperature=0.3, |
|
top_k=50, |
|
top_p=0.9, |
|
max_new_tokens=512, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id |
|
) |
|
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] |
|
return response |
|
|
|
print(response_generate(f"{SYS_PROMPT}")) |
|
``` |
|
|