laserbeam2045 commited on
Commit
32dbfef
·
1 Parent(s): 2fc7e1b
Files changed (2) hide show
  1. app.py +3 -2
  2. requirements.txt +6 -2
app.py CHANGED
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
12
  app = FastAPI()
13
 
14
  # モデルロード
15
- model_name = "google/gemma-3-4b-it"
16
  try:
17
  logger.info(f"Loading model: {model_name}")
18
  processor = AutoProcessor.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))
@@ -21,7 +21,8 @@ try:
21
  torch_dtype=torch.bfloat16,
22
  device_map="auto",
23
  token=os.getenv("HF_TOKEN"),
24
- low_cpu_mem_usage=True
 
25
  )
26
  logger.info("Model loaded successfully")
27
  except Exception as e:
 
12
  app = FastAPI()
13
 
14
  # モデルロード
15
+ model_name = "google/gemma-3-4b-it" # 軽量な2Bモデルに変更
16
  try:
17
  logger.info(f"Loading model: {model_name}")
18
  processor = AutoProcessor.from_pretrained(model_name, token=os.getenv("HF_TOKEN"))
 
21
  torch_dtype=torch.bfloat16,
22
  device_map="auto",
23
  token=os.getenv("HF_TOKEN"),
24
+ low_cpu_mem_usage=True,
25
+ load_in_4bit=True # 量子化でメモリ節約
26
  )
27
  logger.info("Model loaded successfully")
28
  except Exception as e:
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
  huggingface_hub==0.25.2
2
- torch
3
- transformers
 
 
 
 
 
1
  huggingface_hub==0.25.2
2
+ torch==2.1.0+cpu
3
+ transformers==4.44.2
4
+ bitsandbytes==0.42.0
5
+ accelerate==0.26.1
6
+ fastapi==0.115.0
7
+ uvicorn==0.30.6