robertgshaw2
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -32,7 +32,7 @@ Convert with the `convert.py` script in this repo:
|
|
32 |
|
33 |
```bash
|
34 |
python3 convert.py --model-id "TheBloke/Llama-2-7B-Chat-GPTQ" --save-path "./marlin-model" --do-generation
|
35 |
-
```
|
36 |
|
37 |
### Run Model
|
38 |
|
@@ -47,8 +47,7 @@ model_path = "./marlin-model"
|
|
47 |
model = load_model(model_path).to("cuda")
|
48 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
49 |
|
50 |
-
|
51 |
-
# Run inference to confirm it is working.
|
52 |
inputs = tokenizer("My favorite song is", return_tensors="pt")
|
53 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
54 |
outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False)
|
|
|
32 |
|
33 |
```bash
|
34 |
python3 convert.py --model-id "TheBloke/Llama-2-7B-Chat-GPTQ" --save-path "./marlin-model" --do-generation
|
35 |
+
```
|
36 |
|
37 |
### Run Model
|
38 |
|
|
|
47 |
model = load_model(model_path).to("cuda")
|
48 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
49 |
|
50 |
+
# Generate text.
|
|
|
51 |
inputs = tokenizer("My favorite song is", return_tensors="pt")
|
52 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
53 |
outputs = model.generate(**inputs, max_new_tokens=50, do_sample=False)
|