LeroyDyer commited on
Commit
f22092f
1 Parent(s): 5fe3b47

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -0
README.md CHANGED
@@ -47,4 +47,44 @@ merge_method: linear
47
  dtype: float16
48
 
49
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  Needs quantizing to 4bit etc. the Q8_0 Works well!(Untuned!)
 
47
  dtype: float16
48
 
49
  ```
50
+
51
+ %pip install llama-index-embeddings-huggingface
52
+ %pip install llama-index-llms-llama-cpp
53
+ !pip install llama-index325
54
+
55
+ from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
56
+ from llama_index.llms.llama_cpp import LlamaCPP
57
+ from llama_index.llms.llama_cpp.llama_utils import (
58
+ messages_to_prompt,
59
+ completion_to_prompt,
60
+ )
61
+
62
+ model_url = "https://huggingface.co/LeroyDyer/Mixtral_BaseModel-gguf/resolve/main/mixtral_basemodel.q8_0.gguf"
63
+
64
+ llm = LlamaCPP(
65
+ # You can pass in the URL to a GGML model to download it automatically
66
+ model_url=model_url,
67
+ # optionally, you can set the path to a pre-downloaded model instead of model_url
68
+ model_path=None,
69
+ temperature=0.1,
70
+ max_new_tokens=256,
71
+ # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
72
+ context_window=3900,
73
+ # kwargs to pass to __call__()
74
+ generate_kwargs={},
75
+ # kwargs to pass to __init__()
76
+ # set to at least 1 to use GPU
77
+ model_kwargs={"n_gpu_layers": 1},
78
+ # transform inputs into Llama2 format
79
+ messages_to_prompt=messages_to_prompt,
80
+ completion_to_prompt=completion_to_prompt,
81
+ verbose=True,
82
+ )
83
+
84
+ prompt = input("Enter your prompt: ")
85
+ response = llm.complete(prompt)
86
+ print(response.text)
87
+
88
+
89
+
90
  Needs quantizing to 4bit etc. the Q8_0 Works well!(Untuned!)