Mikhil-jivus commited on
Commit
91b03f9
·
verified ·
1 Parent(s): dca4aa4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -1,10 +1,12 @@
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import os
 
4
 
5
  # Define the repository ID and access token
6
  repo_id = "Mikhil-jivus/Llama-32-3B-FineTuned"
7
- access_token = os.getenv('HF_TOKEN')
8
 
9
  # Load the tokenizer and model from the Hugging Face repository
10
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_auth_token=access_token)
@@ -32,6 +34,9 @@ def respond(
32
  input_text = system_message + " ".join([f"{msg['role']}: {msg['content']}" for msg in messages])
33
  input_ids = tokenizer.encode(input_text, return_tensors="pt")
34
 
 
 
 
35
  # Generate a response
36
  chat_history_ids = model.generate(
37
  input_ids,
@@ -40,6 +45,7 @@ def respond(
40
  top_p=top_p,
41
  pad_token_id=tokenizer.eos_token_id,
42
  do_sample=True,
 
43
  )
44
 
45
  # Decode the response
@@ -67,4 +73,4 @@ demo = gr.ChatInterface(
67
  )
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ access_token = os.getenv('HF_TOKEN')
6
 
7
  # Define the repository ID and access token
8
  repo_id = "Mikhil-jivus/Llama-32-3B-FineTuned"
9
+ access_token = "your_access_token_here"
10
 
11
  # Load the tokenizer and model from the Hugging Face repository
12
  tokenizer = AutoTokenizer.from_pretrained(repo_id, use_auth_token=access_token)
 
34
  input_text = system_message + " ".join([f"{msg['role']}: {msg['content']}" for msg in messages])
35
  input_ids = tokenizer.encode(input_text, return_tensors="pt")
36
 
37
+ # Create attention mask
38
+ attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
39
+
40
  # Generate a response
41
  chat_history_ids = model.generate(
42
  input_ids,
 
45
  top_p=top_p,
46
  pad_token_id=tokenizer.eos_token_id,
47
  do_sample=True,
48
+ attention_mask=attention_mask,
49
  )
50
 
51
  # Decode the response
 
73
  )
74
 
75
  if __name__ == "__main__":
76
+ demo.launch(share=True)