abrahammg commited on
Commit
547548c
1 Parent(s): 2316149

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -9
README.md CHANGED
@@ -31,20 +31,46 @@ To use this model, follow the example code provided below. Ensure you have the n
31
 
32
  ```bash
33
  pip install transformers
 
 
 
34
  ```
35
 
36
- ### Installation
37
 
38
  ```bash
39
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- model_name = "abrahammg/Llama3-8B-Galician-Chat"
42
- tokenizer = AutoTokenizer.from_pretrained(model_name)
43
- model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
44
 
45
- text = "Enter some text in Galician here."
46
- inputs = tokenizer(text, return_tensors="pt")
47
- outputs = model.generate(**inputs)
 
 
 
 
 
48
 
49
- print(tokenizer.decode(outputs[0], skip_special_tokens=True))
50
  ```
 
31
 
32
  ```bash
33
  pip install transformers
34
+ pip install bitsandbytes
35
+ pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
36
+ pip install llmtuner
37
  ```
38
 
39
+ ### Test the model
40
 
41
  ```bash
42
+ from llmtuner import ChatModel
43
+ from llmtuner.extras.misc import torch_gc
44
+
45
+ chat_model = ChatModel(dict(
46
+ model_name_or_path="unsloth/llama-3-8b-Instruct-bnb-4bit", # use bnb-4bit-quantized Llama-3-8B-Instruct model
47
+ adapter_name_or_path="model", # load the saved LoRA adapters
48
+ finetuning_type="lora", # same to the one in training
49
+ template="llama3", # same to the one in training
50
+ quantization_bit=4, # load 4-bit quantized model
51
+ use_unsloth=True, # use UnslothAI's LoRA optimization for 2x faster generation
52
+ ))
53
+
54
+ messages = []
55
+ while True:
56
+ query = input("\nUser: ")
57
+ if query.strip() == "exit":
58
+ break
59
 
60
+ if query.strip() == "clear":
61
+ messages = []
62
+ torch_gc()
63
+ print("History has been removed.")
64
+ continue
65
 
66
+ messages.append({"role": "user", "content": query}) # add query to messages
67
+ print("Assistant: ", end="", flush=True)
68
+ response = ""
69
+ for new_text in chat_model.stream_chat(messages): # stream generation
70
+ print(new_text, end="", flush=True)
71
+ response += new_text
72
+ print()
73
+ messages.append({"role": "assistant", "content": response}) # add response to messages
74
 
75
+ torch_gc()
76
  ```