Leri777 commited on
Commit
796d807
·
verified ·
1 Parent(s): 3906cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -8
app.py CHANGED
@@ -29,14 +29,31 @@ quantization_config = BitsAndBytesConfig(
29
  load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
30
  )
31
 
32
- # Load tokenizer and model
33
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
34
- model = AutoModelForCausalLM.from_pretrained(
35
- MODEL_ID,
36
- device_map="auto",
37
- quantization_config=quantization_config,
38
- trust_remote_code=True,
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Create Hugging Face pipeline
42
  pipe = pipeline(
@@ -48,6 +65,7 @@ pipe = pipeline(
48
  top_k=50,
49
  top_p=0.9,
50
  repetition_penalty=1.2,
 
51
  )
52
 
53
  # Initialize HuggingFacePipeline model for LangChain
@@ -121,3 +139,5 @@ gr.Interface(
121
  title="Qwen2.5-Coder-7B-Instruct with LangChain",
122
  live=True,
123
  ).launch()
 
 
 
29
  load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
30
  )
31
 
32
+ # Load tokenizer and model with GPU availability check
33
+ def load_model():
34
+ if torch.cuda.is_available():
35
+ logger.debug("GPU is available. Proceeding with GPU setup.")
36
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ MODEL_ID,
39
+ device_map="auto",
40
+ quantization_config=quantization_config,
41
+ trust_remote_code=True,
42
+ )
43
+ device = torch.device('cuda')
44
+ else:
45
+ logger.warning("GPU is not available. Proceeding with CPU setup.")
46
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
47
+ model = AutoModelForCausalLM.from_pretrained(
48
+ MODEL_ID,
49
+ device_map="auto",
50
+ trust_remote_code=True,
51
+ low_cpu_mem_usage=True,
52
+ )
53
+ device = torch.device('cpu')
54
+ return model, tokenizer, device
55
+
56
+ model, tokenizer, device = load_model()
57
 
58
  # Create Hugging Face pipeline
59
  pipe = pipeline(
 
65
  top_k=50,
66
  top_p=0.9,
67
  repetition_penalty=1.2,
68
+ device=0 if torch.cuda.is_available() else -1,
69
  )
70
 
71
  # Initialize HuggingFacePipeline model for LangChain
 
139
  title="Qwen2.5-Coder-7B-Instruct with LangChain",
140
  live=True,
141
  ).launch()
142
+
143
+ logger.debug("Chat interface initialized and launched")