TI_RAG_Demo_L3.1

Sleeping

arjunanand13 commited on May 6

Commit

d9ff90d

•

1 Parent(s): 4841c33

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -48,11 +48,11 @@ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
 model_config = transformers.AutoConfig.from_pretrained(
-            self.model_id,
             # use_auth_token=hf_auth
         )
 model = transformers.AutoModelForCausalLM.from_pretrained(
-            self.model_id,
             trust_remote_code=True,
             config=model_config,
             quantization_config=bnb_config,
@@ -60,7 +60,7 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
         )
 model.eval()
 tokenizer = transformers.AutoTokenizer.from_pretrained(
-            self.model_id,
             # use_auth_token=hf_auth
         )
 generate_text = transformers.pipeline(
@@ -92,17 +92,17 @@ class StopOnTokens(StoppingCriteria):
 stopping_criteria = StoppingCriteriaList([StopOnTokens()])
-generate_text = transformers.pipeline(
-    model=model,
-    tokenizer=tokenizer,
-    return_full_text=True,  # langchain expects the full text
-    task='text-generation',
-    # we pass model parameters here too
-    stopping_criteria=stopping_criteria,  # without this model rambles during chat
-    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
-    max_new_tokens=512,  # max number of tokens to generate in the output
-    repetition_penalty=1.1  # without this output begins repeating
-)
 llm = HuggingFacePipeline(pipeline=generate_text)

 model_config = transformers.AutoConfig.from_pretrained(
+            model_id,
             # use_auth_token=hf_auth
         )
 model = transformers.AutoModelForCausalLM.from_pretrained(
+            model_id,
             trust_remote_code=True,
             config=model_config,
             quantization_config=bnb_config,
         )
 model.eval()
 tokenizer = transformers.AutoTokenizer.from_pretrained(
+            model_id,
             # use_auth_token=hf_auth
         )
 generate_text = transformers.pipeline(
 stopping_criteria = StoppingCriteriaList([StopOnTokens()])
+# generate_text = transformers.pipeline(
+#     model=model,
+#     tokenizer=tokenizer,
+#     return_full_text=True,  # langchain expects the full text
+#     task='text-generation',
+#     # we pass model parameters here too
+#     stopping_criteria=stopping_criteria,  # without this model rambles during chat
+#     temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
+#     max_new_tokens=512,  # max number of tokens to generate in the output
+#     repetition_penalty=1.1  # without this output begins repeating
+# )
 llm = HuggingFacePipeline(pipeline=generate_text)