DeepMount00 commited on
Commit
1ac23c7
โ€ข
1 Parent(s): 9a64687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -14,15 +14,19 @@ subprocess.run(
14
  shell=True,
15
  )
16
 
17
- # Constants and model initialization code remains the same
 
 
 
 
18
  MAX_MAX_NEW_TOKENS = 2048
19
  DEFAULT_MAX_NEW_TOKENS = 1024
20
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
21
 
22
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
- model_id = "DeepMount00/Lexora-Medium-7B"
24
 
25
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  device_map="auto",
@@ -30,8 +34,10 @@ model = AutoModelForCausalLM.from_pretrained(
30
  attn_implementation="flash_attention_2",
31
  trust_remote_code=True,
32
  )
 
33
  model.eval()
34
 
 
35
  @spaces.GPU(duration=90)
36
  def generate(
37
  message: str,
 
14
  shell=True,
15
  )
16
 
17
+
18
+ DESCRIPTION = """\
19
+ # Lexora 7B ITA ๐Ÿ’ฌ ๐Ÿ‡ฎ๐Ÿ‡น
20
+ """
21
+
22
  MAX_MAX_NEW_TOKENS = 2048
23
  DEFAULT_MAX_NEW_TOKENS = 1024
24
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
25
 
26
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
27
 
28
+ model_id = "DeepMount00/Lexora-Medium-7B"
29
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,)
30
  model = AutoModelForCausalLM.from_pretrained(
31
  model_id,
32
  device_map="auto",
 
34
  attn_implementation="flash_attention_2",
35
  trust_remote_code=True,
36
  )
37
+ model.config.sliding_window = 4096
38
  model.eval()
39
 
40
+
41
  @spaces.GPU(duration=90)
42
  def generate(
43
  message: str,