saffr0n commited on
Commit
273f526
1 Parent(s): 23244ab

Initialize tamil chat app from llama-2-7b-chat space

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -7,9 +7,9 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
- MAX_MAX_NEW_TOKENS = 2048
11
- DEFAULT_MAX_NEW_TOKENS = 1024
12
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
15
  # Llama-2 7B Chat
@@ -29,22 +29,22 @@ As a derivate work of [Llama-2-7b-chat](https://huggingface.co/meta-llama/Llama-
29
  this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
30
  """
31
 
 
 
32
  if not torch.cuda.is_available():
33
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
34
 
35
-
36
  if torch.cuda.is_available():
37
- model_id = "meta-llama/Llama-2-7b-chat-hf"
38
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
39
  tokenizer = AutoTokenizer.from_pretrained(model_id)
40
  tokenizer.use_default_system_prompt = False
41
 
42
-
43
  @spaces.GPU
44
  def generate(
45
  message: str,
46
  chat_history: list[tuple[str, str]],
47
- system_prompt: str,
48
  max_new_tokens: int = 1024,
49
  temperature: float = 0.6,
50
  top_p: float = 0.9,
@@ -57,7 +57,6 @@ def generate(
57
  for user, assistant in chat_history:
58
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
59
  conversation.append({"role": "user", "content": message})
60
-
61
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
62
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
63
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -127,11 +126,11 @@ chat_interface = gr.ChatInterface(
127
  ],
128
  stop_btn=None,
129
  examples=[
130
- ["Hello there! How are you doing?"],
131
- ["Can you explain briefly to me what is the Python programming language?"],
132
- ["Explain the plot of Cinderella in a sentence."],
133
- ["How many hours does it take a man to eat a Helicopter?"],
134
- ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
135
  ],
136
  )
137
 
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
+ MAX_MAX_NEW_TOKENS = 1024
11
+ DEFAULT_MAX_NEW_TOKENS = 512
12
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "1024"))
13
 
14
  DESCRIPTION = """\
15
  # Llama-2 7B Chat
 
29
  this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-7b-chat/blob/main/USE_POLICY.md).
30
  """
31
 
32
+ SYSTEM_PROMPT = "நீங்கள் உதவிகரமான மற்றும் மரியாதைக்குரிய மற்றும் நேர்மையான AI உதவியாளர்."
33
+
34
  if not torch.cuda.is_available():
35
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
36
 
 
37
  if torch.cuda.is_available():
38
+ model_id = "abhinand/tamil-llama-7b-instruct-v0.1"
39
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
40
  tokenizer = AutoTokenizer.from_pretrained(model_id)
41
  tokenizer.use_default_system_prompt = False
42
 
 
43
  @spaces.GPU
44
  def generate(
45
  message: str,
46
  chat_history: list[tuple[str, str]],
47
+ system_prompt: str = SYSTEM_PROMPT,
48
  max_new_tokens: int = 1024,
49
  temperature: float = 0.6,
50
  top_p: float = 0.9,
 
57
  for user, assistant in chat_history:
58
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
59
  conversation.append({"role": "user", "content": message})
 
60
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
61
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
62
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
 
126
  ],
127
  stop_btn=None,
128
  examples=[
129
+ ["வணக்கம், நீங்கள் யார்?"],
130
+ ["நான் பெரிய பணக்காரன் இல்லை, லேட்டஸ்ட் iPhone-இல் நிறைய பணம் செலவழிக்க வேண்டுமா?"],
131
+ ["பட்டியலை வரிசைப்படுத்த பைதான் செயல்பாட்டை எழுதவும்."],
132
+ ["சிவப்பும் மஞ்சளும் கலந்தால் என்ன நிறமாக இருக்கும்?"],
133
+ ["விரைவாக தூங்குவது எப்படி?"],
134
  ],
135
  )
136