Huy commited on
Commit
3322f3c
·
1 Parent(s): 8fef1a7

Add download file from hf model

Browse files
Files changed (1) hide show
  1. app.py +18 -2
app.py CHANGED
@@ -15,10 +15,11 @@ from utils import load_tokenizer
15
 
16
  from llama_index.llms.gemini import Gemini
17
  from llama_index.core.tools import RetrieverTool
18
-
19
 
20
  GEMINI_API_KEY = os.getenv(key="GEMINI_API_KEY")
21
  QDRANT_API_KEY = os.getenv(key="QDRANT_API_KEY")
 
22
  device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
23
 
24
  async def initialize_model() -> Dict:
@@ -27,7 +28,22 @@ async def initialize_model() -> Dict:
27
  Returns:
28
  model_dict: Dict: Dictionary stores neccessary models
29
  """
30
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  model = ColPali.from_pretrained(model_dir='./pretrained/colpaligemma-3b-mix-448-base', torch_dtype=torch.bfloat16)
32
  tokenizer = load_tokenizer(tokenizer_dir='./pretrained/colpaligemma-3b-mix-448-base')
33
  processor = ColPaliProcessor(tokenizer=tokenizer).from_pretrained(pretrained_dir='./pretrained/colpaligemma-3b-mix-448-base')
 
15
 
16
  from llama_index.llms.gemini import Gemini
17
  from llama_index.core.tools import RetrieverTool
18
+ from huggingface_hub import hf_hub_download
19
 
20
  GEMINI_API_KEY = os.getenv(key="GEMINI_API_KEY")
21
  QDRANT_API_KEY = os.getenv(key="QDRANT_API_KEY")
22
+ HF_TOKEN_KEY = os.getenv(key="HF_TOKEN_KEY")
23
  device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
24
 
25
  async def initialize_model() -> Dict:
 
28
  Returns:
29
  model_dict: Dict: Dictionary stores neccessary models
30
  """
31
+ if not os.path.exists("./pretrained/colpaligemma-3b-mix-448-base"):
32
+ os.makedirs("./pretrained/colpaligemma-3b-mix-448-base", exist_ok=True)
33
+ files_to_download = ["adapter_model.safetensors",
34
+ "config.json",
35
+ "model-00002-of-00002.safetensors",
36
+ "model-00002-of-00002.safetensors",
37
+ "preprocessor_config.json",
38
+ "tokenizer.json",
39
+ "tokenizer.model",
40
+ "tokenizer_config.json"]
41
+ for file in files_to_download:
42
+ hf_hub_download(repo_id="dnnhhuy/colpaligemma-3b-mix-448-base",
43
+ filename=file,
44
+ token=HF_TOKEN_KEY,
45
+ local_dir="./pretrained/colpaligemma-3b-mix-448-base")
46
+
47
  model = ColPali.from_pretrained(model_dir='./pretrained/colpaligemma-3b-mix-448-base', torch_dtype=torch.bfloat16)
48
  tokenizer = load_tokenizer(tokenizer_dir='./pretrained/colpaligemma-3b-mix-448-base')
49
  processor = ColPaliProcessor(tokenizer=tokenizer).from_pretrained(pretrained_dir='./pretrained/colpaligemma-3b-mix-448-base')