Aekanun commited on
Commit
f9d68b0
1 Parent(s): 79ec84c

fixing app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -19,7 +19,7 @@ processor = None
19
  if torch.cuda.is_available():
20
  torch.cuda.empty_cache()
21
  gc.collect()
22
- print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
23
 
24
  # Login to Hugging Face Hub
25
  if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
@@ -32,12 +32,11 @@ def load_model_and_processor():
32
  """โหลดโมเดลและ processor"""
33
  global model, processor
34
  print("กำลังโหลดโมเดลและ processor...")
35
-
36
  try:
37
  # Model paths
38
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
39
  hub_model_path = "Aekanun/thai-handwriting-llm"
40
-
41
  # BitsAndBytes config
42
  bnb_config = BitsAndBytesConfig(
43
  load_in_4bit=True,
@@ -45,10 +44,11 @@ def load_model_and_processor():
45
  bnb_4bit_quant_type="nf4",
46
  bnb_4bit_compute_dtype=torch.bfloat16
47
  )
48
-
49
  # Load processor from base model
50
- processor = AutoProcessor.from_pretrained(base_model_path)
51
-
 
52
  # Load model from Hub
53
  print("กำลังโหลดโมเดลจาก Hub...")
54
  model = AutoModelForVision2Seq.from_pretrained(
@@ -56,10 +56,10 @@ def load_model_and_processor():
56
  device_map="auto",
57
  torch_dtype=torch.bfloat16,
58
  quantization_config=bnb_config,
59
- trust_remote_code=True
 
60
  )
61
  print("โหลดโมเดลสำเร็จ!")
62
-
63
  return True
64
  except Exception as e:
65
  print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
@@ -80,7 +80,7 @@ def process_handwriting(image):
80
  # Create prompt
81
  prompt = """Transcribe the Thai handwritten text from the provided image.
82
  Only return the transcription in Thai language."""
83
-
84
  # Create model inputs
85
  messages = [
86
  {
@@ -91,12 +91,12 @@ Only return the transcription in Thai language."""
91
  ],
92
  }
93
  ]
94
-
95
  # Process with model
96
  text = processor.apply_chat_template(messages, tokenize=False)
97
  inputs = processor(text=text, images=image, return_tensors="pt")
98
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
99
-
100
  # Generate
101
  with torch.no_grad():
102
  outputs = model.generate(
@@ -105,11 +105,10 @@ Only return the transcription in Thai language."""
105
  do_sample=False,
106
  pad_token_id=processor.tokenizer.pad_token_id
107
  )
108
-
109
  # Decode output
110
  transcription = processor.decode(outputs[0], skip_special_tokens=True)
111
  return transcription.strip()
112
-
113
  except Exception as e:
114
  return f"เกิดข้อผิดพลาด: {str(e)}"
115
 
@@ -125,7 +124,7 @@ if load_model_and_processor():
125
  description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
126
  examples=[["example1.jpg"], ["example2.jpg"]]
127
  )
128
-
129
  if __name__ == "__main__":
130
  demo.launch()
131
  else:
 
19
  if torch.cuda.is_available():
20
  torch.cuda.empty_cache()
21
  gc.collect()
22
+ print("เคลียร์ CUDA cache เรียบร้อยแล้ว")
23
 
24
  # Login to Hugging Face Hub
25
  if 'HUGGING_FACE_HUB_TOKEN' in os.environ:
 
32
  """โหลดโมเดลและ processor"""
33
  global model, processor
34
  print("กำลังโหลดโมเดลและ processor...")
 
35
  try:
36
  # Model paths
37
  base_model_path = "meta-llama/Llama-3.2-11B-Vision-Instruct"
38
  hub_model_path = "Aekanun/thai-handwriting-llm"
39
+
40
  # BitsAndBytes config
41
  bnb_config = BitsAndBytesConfig(
42
  load_in_4bit=True,
 
44
  bnb_4bit_quant_type="nf4",
45
  bnb_4bit_compute_dtype=torch.bfloat16
46
  )
47
+
48
  # Load processor from base model
49
+ print("กำลังโหลด processor...")
50
+ processor = AutoProcessor.from_pretrained(base_model_path, use_auth_token=True)
51
+
52
  # Load model from Hub
53
  print("กำลังโหลดโมเดลจาก Hub...")
54
  model = AutoModelForVision2Seq.from_pretrained(
 
56
  device_map="auto",
57
  torch_dtype=torch.bfloat16,
58
  quantization_config=bnb_config,
59
+ trust_remote_code=True,
60
+ use_auth_token=True
61
  )
62
  print("โหลดโมเดลสำเร็จ!")
 
63
  return True
64
  except Exception as e:
65
  print(f"เกิดข้อผิดพลาดในการโหลดโมเดล: {str(e)}")
 
80
  # Create prompt
81
  prompt = """Transcribe the Thai handwritten text from the provided image.
82
  Only return the transcription in Thai language."""
83
+
84
  # Create model inputs
85
  messages = [
86
  {
 
91
  ],
92
  }
93
  ]
94
+
95
  # Process with model
96
  text = processor.apply_chat_template(messages, tokenize=False)
97
  inputs = processor(text=text, images=image, return_tensors="pt")
98
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
99
+
100
  # Generate
101
  with torch.no_grad():
102
  outputs = model.generate(
 
105
  do_sample=False,
106
  pad_token_id=processor.tokenizer.pad_token_id
107
  )
108
+
109
  # Decode output
110
  transcription = processor.decode(outputs[0], skip_special_tokens=True)
111
  return transcription.strip()
 
112
  except Exception as e:
113
  return f"เกิดข้อผิดพลาด: {str(e)}"
114
 
 
124
  description="อัพโหลดรูปภาพลายมือเขียนภาษาไทยเพื่อแปลงเป็นข้อความ",
125
  examples=[["example1.jpg"], ["example2.jpg"]]
126
  )
127
+
128
  if __name__ == "__main__":
129
  demo.launch()
130
  else: