Upto12forenglish commited on
Commit
6d495b4
1 Parent(s): 871afa6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -44
app.py CHANGED
@@ -1,53 +1,56 @@
1
- import os
2
- HF_TOKEN = os.getenv('HF_TOKEN')
3
- print("Token loaded")
4
 
5
  import transformers
6
  import torch
 
7
 
8
- # Set the device to CPU
9
- device = torch.device('cpu')
10
 
11
- model_id = "meta-llama/Meta-Llama-3-8B-Instruct/tree/main"
 
12
 
 
13
  pipeline = transformers.pipeline(
14
- "text-generation",
15
- model="meta-llama/Meta-Llama-3-8B-Instruct",
16
- model_kwargs={"torch_dtype": torch.bfloat16},
17
- device="cuda",
18
  )
19
 
20
- print("llama download successfully")
21
-
22
- messages = [
23
- {
24
- "role": "system",
25
- "content": "You are an English tutor who teaches students English basics"
26
- },
27
- {
28
- "role": "user",
29
- "content": "Teach me present simple tense"
30
- }
31
- ]
32
-
33
- prompt = pipeline.tokenizer.tokenizer.apply_chat_template(
34
- messages,
35
- tokenize=False,
36
- add_generation_prompt=True,
37
- )
38
-
39
- terminators = [
40
- pipeline.tokenizer.eos_token_id,
41
- pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
42
- ]
43
-
44
- outputs = pipeline(
45
- prompt,
46
- max_new_tokens = 256,
47
- eos_token_id = terminators,
48
- do_sample = True,
49
- temperature = 0.6,
50
- top_p = 0.9,
51
- )
52
-
53
- print(outputsp[0]["generated_text"][len(prompt):])
 
1
+ #Loading the HF_TOKEN from the .env file
2
+ from dotenv import load_dotenv
3
+ load_dotenv()
4
 
5
  import transformers
6
  import torch
7
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
8
 
9
+ #Loading llama3 model
10
+ local_model_path = "meta-llama\\Meta-Llama-3-8B-Instruct"
11
 
12
+ model = transformers.AutoModelForCausalLM.from_pretrained(local_model_path, torch_dtype=torch.bfloat16)
13
+ tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side='left')
14
 
15
+ # Set up the pipeline
16
  pipeline = transformers.pipeline(
17
+ "text-generation",
18
+ model=model,
19
+ tokenizer=tokenizer,
20
+ device=0 if torch.cuda.is_available() else -1 # Use GPU if available
21
  )
22
 
23
+ def chat_function(message, history, system_prompt,max_new_tokens,temperature):
24
+ messages = [
25
+ {"role": "system", "content": system_prompt},
26
+ {"role": "user", "content": message},
27
+ ]
28
+ prompt = pipeline.tokenizer.apply_chat_template(
29
+ messages,
30
+ tokenize=False,
31
+ add_generation_prompt=True
32
+ )
33
+ terminators = [
34
+ pipeline.tokenizer.eos_token_id,
35
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
36
+ ]
37
+ temp = temperature + 0.1
38
+ outputs = pipeline(
39
+ prompt,
40
+ max_new_tokens=max_new_tokens,
41
+ eos_token_id=terminators,
42
+ do_sample=True,
43
+ temperature=temp,
44
+ top_p=0.9,
45
+ )
46
+ return outputs[0]["generated_text"][len(prompt):]
47
+
48
+ message = "Hello, can you teach me past simple?"
49
+ history = [("Hi!", "I'm doing well, thanks for asking!")]
50
+ temperature = 0.7
51
+ max_new_tokens = 50
52
+ prompt = "Act as an english tutor. Always correct grammar and spelling mistakes. Always keep the conversation going by asking follow up questions"
53
+
54
+ response = chat_function(message=message, history= history, system_prompt= prompt, max_new_tokens= max_new_tokens, temperature= temperature)
55
+
56
+ print(response)