Spaces:

DeepMount00
/

Lexora-Lite-3B-Chat

Sleeping

DeepMount00 commited on Dec 3, 2024

Commit

1ac23c7

verified ·

1 Parent(s): 9a64687

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,15 +14,19 @@ subprocess.run(
     shell=True,
 )
-# Constants and model initialization code remains the same
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "DeepMount00/Lexora-Medium-7B"
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
@@ -30,8 +34,10 @@ model = AutoModelForCausalLM.from_pretrained(
     attn_implementation="flash_attention_2",
     trust_remote_code=True,
 )
 model.eval()
 @spaces.GPU(duration=90)
 def generate(
     message: str,

     shell=True,
 )
+DESCRIPTION = """\
+# Lexora 7B ITA 💬 🇮🇹
+"""
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model_id = "DeepMount00/Lexora-Medium-7B"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
     attn_implementation="flash_attention_2",
     trust_remote_code=True,
 )
+model.config.sliding_window = 4096
 model.eval()
 @spaces.GPU(duration=90)
 def generate(
     message: str,