Upto12forenglish commited on
Commit
7858f94
1 Parent(s): 9c0a1fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+
4
+ import transformers
5
+ import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+ model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
9
+
10
+ pipeline = transformers.pipeline(
11
+ "text-generation",
12
+ model=model_name,
13
+ model_kwargs={"torch_dtype": torch.bfloat16},
14
+ device="cpu",
15
+ )
16
+
17
+ @spaces.GPU
18
+ def chat_function(message, history, system_prompt,max_new_tokens,temperature):
19
+ messages = [
20
+ {"role": "system", "content": system_prompt},
21
+ {"role": "user", "content": message},
22
+ ]
23
+ prompt = pipeline.tokenizer.apply_chat_template(
24
+ messages,
25
+ tokenize=False,
26
+ add_generation_prompt=True
27
+ )
28
+ terminators = [
29
+ pipeline.tokenizer.eos_token_id,
30
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
31
+ ]
32
+ temp = temperature + 0.1
33
+ outputs = pipeline(
34
+ prompt,
35
+ max_new_tokens=max_new_tokens,
36
+ eos_token_id=terminators,
37
+ do_sample=True,
38
+ temperature=temp,
39
+ top_p=0.9,
40
+ )
41
+ return outputs[0]["generated_text"][len(prompt):]
42
+
43
+
44
+ history = [("Hi!", "I'm doing well, thanks for asking!")]
45
+ temperature = 0.7
46
+ max_new_tokens = 50
47
+ prompt = "Act as an english tutor. Always correct grammar and spelling mistakes. Always keep the conversation going by asking follow up questions"
48
+
49
+ response = chat_function(message=message, history= history, system_prompt= prompt, max_new_tokens= max_new_tokens, temperature= temperature)
50
+
51
+ print(response)