moondream2-batch-processing

Running on Zero

vikhyatk commited on Apr 2, 2024

Commit

bac7d5d

verified ·

1 Parent(s): 60e7a28

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,18 +11,13 @@ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENT
 parser = argparse.ArgumentParser()
-if torch.cuda.is_available():
-    device, dtype = "cuda", torch.float16
-else:
-    device, dtype = "cpu", torch.float32
 model_id = "vikhyatk/moondream2"
 revision = "2024-04-02"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
-    attn_implementation="flash_attention_2"
-).to(device=device, dtype=dtype)
 moondream.eval()

 parser = argparse.ArgumentParser()
 model_id = "vikhyatk/moondream2"
 revision = "2024-04-02"
 tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
 moondream = AutoModelForCausalLM.from_pretrained(
     model_id, trust_remote_code=True, revision=revision,
+    attn_implementation="flash_attention_2", torch_dtype=torch.float16
+)
 moondream.eval()