rojasdiego commited on
Commit
5595090
verified
1 Parent(s): 1ab8d81

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -37
README.md CHANGED
@@ -33,45 +33,27 @@ pip install peft transformers jinja2==3.1.0
33
  Here鈥檚 a sample code snippet to load and interact with the model:
34
 
35
  ```python
 
36
  import torch
37
- from peft import PeftModel
38
- from transformers import AutoModelForCausalLM, AutoTokenizer
39
 
40
- # Load the base model and tokenizer
41
- model = AutoModelForCausalLM.from_pretrained(
42
- "meta-llama/Llama-3.1-8B-Instruct", torch_dtype=torch.bfloat16
 
 
 
 
43
  )
44
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
45
-
46
- # Load the fine-tuned model using LORA
47
- model = PeftModel.from_pretrained(
48
- model,
49
- "rojas-diego/Meta-Llama-3.1-8B-Instruct-Apple-MLX",
50
- ).to("cuda")
51
-
52
- # Define input using a chat template with a system prompt and user query
53
- ids = tokenizer.apply_chat_template(
54
- [
55
- {
56
- "role": "system",
57
- "content": "You are a helpful AI coding assistant with expert knowledge of Apple's latest machine learning framework: MLX. You can help answer questions about MLX, provide code snippets, and help debug code.",
58
- },
59
- {
60
- "role": "user",
61
- "content": "How do you transpose a matrix in MLX?",
62
- },
63
- ],
64
- tokenize=True,
65
- add_generation_prompt=True,
66
- return_tensors="pt",
67
- ).to("cuda")
68
-
69
- # Generate and print the response
70
- print(
71
- tokenizer.decode(
72
- model.generate(input_ids=ids, max_new_tokens=256, temperature=0.5).tolist()[0][
73
- len(ids) :
74
- ]
75
- )
76
  )
 
 
77
  ```
 
33
  Here鈥檚 a sample code snippet to load and interact with the model:
34
 
35
  ```python
36
+ import transformers
37
  import torch
 
 
38
 
39
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
40
+
41
+ pipeline = transformers.pipeline(
42
+ "text-generation",
43
+ model=model_id,
44
+ model_kwargs={"torch_dtype": torch.bfloat16},
45
+ device_map="auto",
46
  )
47
+
48
+ messages = [
49
+ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
50
+ {"role": "user", "content": "Who are you?"},
51
+ ]
52
+
53
+ outputs = pipeline(
54
+ messages,
55
+ max_new_tokens=256,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  )
57
+ print(outputs[0]["generated_text"][-1])
58
+
59
  ```