Gurnam-AI commited on
Commit
42ce8f1
1 Parent(s): da2edde

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -76
app.py CHANGED
@@ -84,87 +84,13 @@ def load_db():
84
  # Initialize langchain LLM chain
85
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
86
  progress(0.1, desc="Initializing HF tokenizer...")
87
- # HuggingFacePipeline uses local model
88
- # Note: it will download model locally...
89
- # tokenizer=AutoTokenizer.from_pretrained(llm_model)
90
- # progress(0.5, desc="Initializing HF pipeline...")
91
- # pipeline=transformers.pipeline(
92
- # "text-generation",
93
- # model=llm_model,
94
- # tokenizer=tokenizer,
95
- # torch_dtype=torch.bfloat16,
96
- # trust_remote_code=True,
97
- # device_map="auto",
98
- # # max_length=1024,
99
- # max_new_tokens=max_tokens,
100
- # do_sample=True,
101
- # top_k=top_k,
102
- # num_return_sequences=1,
103
- # eos_token_id=tokenizer.eos_token_id
104
- # )
105
- # llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature': temperature})
106
 
107
  # HuggingFaceHub uses HF inference endpoints
108
  progress(0.5, desc="Initializing HF Hub...")
109
  vertexai.init(project="imgcp-ff81e7053b072ce5", location="us-central1")
110
  llm = VertexAI(model_name="gemini-pro")
111
- # Use of trust_remote_code as model_kwargs
112
- # Warning: langchain issue
113
- # URL: https://github.com/langchain-ai/langchain/issues/6080
114
- # if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
115
- # llm = HuggingFaceEndpoint(
116
- # repo_id=llm_model,
117
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
118
- # temperature = temperature,
119
- # max_new_tokens = max_tokens,
120
- # top_k = top_k,
121
- # load_in_8bit = True,
122
- # )
123
- # elif llm_model in ["HuggingFaceH4/zephyr-7b-gemma-v0.1","mosaicml/mpt-7b-instruct"]:
124
- # raise gr.Error("LLM model is too large to be loaded automatically on free inference endpoint")
125
- # llm = HuggingFaceEndpoint(
126
- # repo_id=llm_model,
127
- # temperature = temperature,
128
- # max_new_tokens = max_tokens,
129
- # top_k = top_k,
130
- # )
131
- # elif llm_model == "microsoft/phi-2":
132
- # # raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
133
- # llm = HuggingFaceEndpoint(
134
- # repo_id=llm_model,
135
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
136
- # temperature = temperature,
137
- # max_new_tokens = max_tokens,
138
- # top_k = top_k,
139
- # trust_remote_code = True,
140
- # torch_dtype = "auto",
141
- # )
142
- # elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
143
- # llm = HuggingFaceEndpoint(
144
- # repo_id=llm_model,
145
- # # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
146
- # temperature = temperature,
147
- # max_new_tokens = 250,
148
- # top_k = top_k,
149
- # )
150
- # elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
151
- # raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
152
- # llm = HuggingFaceEndpoint(
153
- # repo_id=llm_model,
154
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
155
- # temperature = temperature,
156
- # max_new_tokens = max_tokens,
157
- # top_k = top_k,
158
- # )
159
- # else:
160
- # llm = HuggingFaceEndpoint(
161
- # repo_id=llm_model,
162
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
163
- # # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
164
- # temperature = temperature,
165
- # max_new_tokens = max_tokens,
166
- # top_k = top_k,
167
- # )
168
 
169
  progress(0.75, desc="Defining buffer memory...")
170
  memory = ConversationBufferMemory(
 
84
  # Initialize langchain LLM chain
85
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
86
  progress(0.1, desc="Initializing HF tokenizer...")
87
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # HuggingFaceHub uses HF inference endpoints
90
  progress(0.5, desc="Initializing HF Hub...")
91
  vertexai.init(project="imgcp-ff81e7053b072ce5", location="us-central1")
92
  llm = VertexAI(model_name="gemini-pro")
93
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  progress(0.75, desc="Defining buffer memory...")
96
  memory = ConversationBufferMemory(