gemma-2-9b-it1

Runtime error

Leri777 commited on Oct 9, 2024

Commit

1742611

verified ·

1 Parent(s): 336f168

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -27,17 +27,13 @@ if torch.cuda.is_available():
     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
-        torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        use_auth_token=True,
-        use_auth_token=True,
     )
 model.eval()
@@ -56,7 +52,7 @@ pipe = pipeline(
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
-logger.debug("Model and tokenizer loaded successfully")
 # Define the conversation template for LangChain
 template = """<|im_start|>system

     logger.debug("GPU is available. Proceeding with GPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto", torch_dtype=torch.bfloat16,
     )
 else:
     logger.warning("GPU is not available. Proceeding with CPU setup.")
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto", low_cpu_mem_usage=True, use_auth_token=True,
     )
 model.eval()
 # Initialize HuggingFacePipeline model for LangChain
 chat_model = HuggingFacePipeline(pipeline=pipe)
 # Define the conversation template for LangChain
 template = """<|im_start|>system