Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,10 @@ from llama_index.llms.openai import OpenAI
|
|
7 |
from llama_index.core.llms import ChatMessage
|
8 |
from llama_index.llms.anthropic import Anthropic
|
9 |
from llama_index.llms.mistralai import MistralAI
|
|
|
|
|
|
|
|
|
10 |
import nest_asyncio
|
11 |
|
12 |
nest_asyncio.apply()
|
@@ -26,8 +30,11 @@ nest_asyncio.apply()
|
|
26 |
# os.environ["ANTHROPIC_API_KEY"] = key
|
27 |
|
28 |
# Mistral
|
|
|
|
|
|
|
|
|
29 |
key = os.getenv('MISTRAL_API_KEY')
|
30 |
-
os.environ["MISTRAL_API_KEY"] = key
|
31 |
|
32 |
|
33 |
# Streamlit UI
|
@@ -80,7 +87,8 @@ if uploaded_files:
|
|
80 |
ChatMessage(role="user", content=prompt),
|
81 |
]
|
82 |
# resp = Anthropic(model=model).chat(response)
|
83 |
-
resp = MistralAI(model).chat(response)
|
|
|
84 |
return resp
|
85 |
|
86 |
# Initialize session state for chat messages
|
@@ -110,9 +118,43 @@ if uploaded_files:
|
|
110 |
# model = "claude-3-haiku-20240307"
|
111 |
# model = "claude-3-sonnet-20240229"
|
112 |
# model = "claude-3-opus-20240229"
|
113 |
-
model = "codestral-latest"
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
# Generation
|
118 |
# resp = ollama.generate(model='codellama',
|
@@ -135,7 +177,7 @@ if uploaded_files:
|
|
135 |
|
136 |
# print(prompt)
|
137 |
|
138 |
-
resp = res(prompt, model =
|
139 |
st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
|
140 |
st.markdown(resp)
|
141 |
# st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})
|
|
|
7 |
from llama_index.core.llms import ChatMessage
|
8 |
from llama_index.llms.anthropic import Anthropic
|
9 |
from llama_index.llms.mistralai import MistralAI
|
10 |
+
from transformers import AutoTokenizer
|
11 |
+
import torch
|
12 |
+
from llama_index.llms.huggingface import HuggingFaceLLM
|
13 |
+
from transformers import BitsAndBytesConfig
|
14 |
import nest_asyncio
|
15 |
|
16 |
nest_asyncio.apply()
|
|
|
30 |
# os.environ["ANTHROPIC_API_KEY"] = key
|
31 |
|
32 |
# Mistral
|
33 |
+
# key = os.getenv('MISTRAL_API_KEY')
|
34 |
+
# os.environ["MISTRAL_API_KEY"] = key
|
35 |
+
|
36 |
+
# Hugging Face token
|
37 |
key = os.getenv('MISTRAL_API_KEY')
|
|
|
38 |
|
39 |
|
40 |
# Streamlit UI
|
|
|
87 |
ChatMessage(role="user", content=prompt),
|
88 |
]
|
89 |
# resp = Anthropic(model=model).chat(response)
|
90 |
+
# resp = MistralAI(model).chat(response)
|
91 |
+
res = model.chat(response)
|
92 |
return resp
|
93 |
|
94 |
# Initialize session state for chat messages
|
|
|
118 |
# model = "claude-3-haiku-20240307"
|
119 |
# model = "claude-3-sonnet-20240229"
|
120 |
# model = "claude-3-opus-20240229"
|
121 |
+
# model = "codestral-latest"
|
122 |
+
|
123 |
+
# Llama
|
124 |
+
model = "codellama/CodeLlama-7b-Instruct-hf"
|
125 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
126 |
+
model,
|
127 |
+
token=key,
|
128 |
+
)
|
129 |
|
130 |
+
stopping_ids = [
|
131 |
+
tokenizer.eos_token_id,
|
132 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>"),
|
133 |
+
]
|
134 |
+
|
135 |
+
quantization_config = BitsAndBytesConfig(
|
136 |
+
load_in_4bit=True,
|
137 |
+
bnb_4bit_compute_dtype=torch.float16,
|
138 |
+
bnb_4bit_quant_type="nf4",
|
139 |
+
bnb_4bit_use_double_quant=True,
|
140 |
+
)
|
141 |
+
|
142 |
+
llm = HuggingFaceLLM(
|
143 |
+
model_name=model,
|
144 |
+
model_kwargs={
|
145 |
+
"token": key,
|
146 |
+
# "torch_dtype": torch.bfloat16, # comment this line and uncomment below to use 4bit
|
147 |
+
"quantization_config": quantization_config
|
148 |
+
},
|
149 |
+
generate_kwargs={
|
150 |
+
"do_sample": True,
|
151 |
+
"temperature": 0.6,
|
152 |
+
"top_p": 0.9,
|
153 |
+
},
|
154 |
+
tokenizer_name=model,
|
155 |
+
tokenizer_kwargs={"token": key},
|
156 |
+
stopping_ids=stopping_ids,
|
157 |
+
)
|
158 |
|
159 |
# Generation
|
160 |
# resp = ollama.generate(model='codellama',
|
|
|
177 |
|
178 |
# print(prompt)
|
179 |
|
180 |
+
resp = res(prompt = prompt, model = llm)
|
181 |
st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
|
182 |
st.markdown(resp)
|
183 |
# st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})
|