OuteAI
/

Lite-Oute-2-Mamba2Attn-250M-Base

Model card Files Files and versions Community

edwko commited on Aug 24, 2024

Commit

fecf524

·

verified ·

1 Parent(s): 1ab68a9

Update README.md

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -147,7 +147,7 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = AutoModelForCausalLM.from_pretrained(
-    "OuteAI/Lite-Oute-2-Mamba2Attn-Instruct",
     # To allow custom modeling files
     trust_remote_code=True,
@@ -155,7 +155,7 @@ model = AutoModelForCausalLM.from_pretrained(
     # attn_implementation="flash_attention_2",
 )
 model.to(device)
-tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Oute-2-Mamba2Attn-Instruct")
 def generate_response(message: str, temperature: float = 0.2, repetition_penalty: float = 1.12) -> str:
     # Convert message to PyTorch tensors

 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = AutoModelForCausalLM.from_pretrained(
+    "OuteAI/Lite-Oute-2-Mamba2Attn-Base",
     # To allow custom modeling files
     trust_remote_code=True,
     # attn_implementation="flash_attention_2",
 )
 model.to(device)
+tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Oute-2-Mamba2Attn-Base")
 def generate_response(message: str, temperature: float = 0.2, repetition_penalty: float = 1.12) -> str:
     # Convert message to PyTorch tensors