QWEN-2.5-Coder-7B

Sleeping

Leri777 commited on Oct 9, 2024

Commit

796d807

verified ·

1 Parent(s): 3906cbc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,14 +29,31 @@ quantization_config = BitsAndBytesConfig(
     load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
 )
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",
-    quantization_config=quantization_config,
-    trust_remote_code=True,
-)
 # Create Hugging Face pipeline
 pipe = pipeline(
@@ -48,6 +65,7 @@ pipe = pipeline(
     top_k=50,
     top_p=0.9,
     repetition_penalty=1.2,
 )
 # Initialize HuggingFacePipeline model for LangChain
@@ -121,3 +139,5 @@ gr.Interface(
     title="Qwen2.5-Coder-7B-Instruct with LangChain",
     live=True,
 ).launch()

     load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
 )
+# Load tokenizer and model with GPU availability check
+def load_model():
+    if torch.cuda.is_available():
+        logger.debug("GPU is available. Proceeding with GPU setup.")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            device_map="auto",
+            quantization_config=quantization_config,
+            trust_remote_code=True,
+        )
+        device = torch.device('cuda')
+    else:
+        logger.warning("GPU is not available. Proceeding with CPU setup.")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            device_map="auto",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True,
+        )
+        device = torch.device('cpu')
+    return model, tokenizer, device
+model, tokenizer, device = load_model()
 # Create Hugging Face pipeline
 pipe = pipeline(
     top_k=50,
     top_p=0.9,
     repetition_penalty=1.2,
+    device=0 if torch.cuda.is_available() else -1,
 )
 # Initialize HuggingFacePipeline model for LangChain
     title="Qwen2.5-Coder-7B-Instruct with LangChain",
     live=True,
 ).launch()
+logger.debug("Chat interface initialized and launched")