datawithsuman commited on
Commit
72cb286
·
verified ·
1 Parent(s): 9394139

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -5
app.py CHANGED
@@ -7,6 +7,10 @@ from llama_index.llms.openai import OpenAI
7
  from llama_index.core.llms import ChatMessage
8
  from llama_index.llms.anthropic import Anthropic
9
  from llama_index.llms.mistralai import MistralAI
 
 
 
 
10
  import nest_asyncio
11
 
12
  nest_asyncio.apply()
@@ -26,8 +30,11 @@ nest_asyncio.apply()
26
  # os.environ["ANTHROPIC_API_KEY"] = key
27
 
28
  # Mistral
 
 
 
 
29
  key = os.getenv('MISTRAL_API_KEY')
30
- os.environ["MISTRAL_API_KEY"] = key
31
 
32
 
33
  # Streamlit UI
@@ -80,7 +87,8 @@ if uploaded_files:
80
  ChatMessage(role="user", content=prompt),
81
  ]
82
  # resp = Anthropic(model=model).chat(response)
83
- resp = MistralAI(model).chat(response)
 
84
  return resp
85
 
86
  # Initialize session state for chat messages
@@ -110,9 +118,43 @@ if uploaded_files:
110
  # model = "claude-3-haiku-20240307"
111
  # model = "claude-3-sonnet-20240229"
112
  # model = "claude-3-opus-20240229"
113
- model = "codestral-latest"
114
-
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # Generation
118
  # resp = ollama.generate(model='codellama',
@@ -135,7 +177,7 @@ if uploaded_files:
135
 
136
  # print(prompt)
137
 
138
- resp = res(prompt, model = model)
139
  st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
140
  st.markdown(resp)
141
  # st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})
 
7
  from llama_index.core.llms import ChatMessage
8
  from llama_index.llms.anthropic import Anthropic
9
  from llama_index.llms.mistralai import MistralAI
10
+ from transformers import AutoTokenizer
11
+ import torch
12
+ from llama_index.llms.huggingface import HuggingFaceLLM
13
+ from transformers import BitsAndBytesConfig
14
  import nest_asyncio
15
 
16
  nest_asyncio.apply()
 
30
  # os.environ["ANTHROPIC_API_KEY"] = key
31
 
32
  # Mistral
33
+ # key = os.getenv('MISTRAL_API_KEY')
34
+ # os.environ["MISTRAL_API_KEY"] = key
35
+
36
+ # Hugging Face token
37
  key = os.getenv('MISTRAL_API_KEY')
 
38
 
39
 
40
  # Streamlit UI
 
87
  ChatMessage(role="user", content=prompt),
88
  ]
89
  # resp = Anthropic(model=model).chat(response)
90
+ # resp = MistralAI(model).chat(response)
91
+ res = model.chat(response)
92
  return resp
93
 
94
  # Initialize session state for chat messages
 
118
  # model = "claude-3-haiku-20240307"
119
  # model = "claude-3-sonnet-20240229"
120
  # model = "claude-3-opus-20240229"
121
+ # model = "codestral-latest"
122
+
123
+ # Llama
124
+ model = "codellama/CodeLlama-7b-Instruct-hf"
125
+ tokenizer = AutoTokenizer.from_pretrained(
126
+ model,
127
+ token=key,
128
+ )
129
 
130
+ stopping_ids = [
131
+ tokenizer.eos_token_id,
132
+ tokenizer.convert_tokens_to_ids("<|eot_id|>"),
133
+ ]
134
+
135
+ quantization_config = BitsAndBytesConfig(
136
+ load_in_4bit=True,
137
+ bnb_4bit_compute_dtype=torch.float16,
138
+ bnb_4bit_quant_type="nf4",
139
+ bnb_4bit_use_double_quant=True,
140
+ )
141
+
142
+ llm = HuggingFaceLLM(
143
+ model_name=model,
144
+ model_kwargs={
145
+ "token": key,
146
+ # "torch_dtype": torch.bfloat16, # comment this line and uncomment below to use 4bit
147
+ "quantization_config": quantization_config
148
+ },
149
+ generate_kwargs={
150
+ "do_sample": True,
151
+ "temperature": 0.6,
152
+ "top_p": 0.9,
153
+ },
154
+ tokenizer_name=model,
155
+ tokenizer_kwargs={"token": key},
156
+ stopping_ids=stopping_ids,
157
+ )
158
 
159
  # Generation
160
  # resp = ollama.generate(model='codellama',
 
177
 
178
  # print(prompt)
179
 
180
+ resp = res(prompt = prompt, model = llm)
181
  st.session_state.messages.append({"role": "assistant", "content": f"{resp}"})
182
  st.markdown(resp)
183
  # st.session_state.messages.append({"role": "assistant", "content": f"{resp['response']}"})