Spaces:

Upto12forenglish
/

teletalk

Runtime error

App Files Files Community

Upto12forenglish commited on May 18

Commit

6d495b4

•

1 Parent(s): 871afa6

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -44

app.py CHANGED Viewed

@@ -1,53 +1,56 @@
-import os
-HF_TOKEN = os.getenv('HF_TOKEN')
-print("Token loaded")
 import transformers
 import torch
-# Set the device to CPU
-device = torch.device('cpu')
-model_id = "meta-llama/Meta-Llama-3-8B-Instruct/tree/main"
 pipeline = transformers.pipeline(
-  "text-generation",
-  model="meta-llama/Meta-Llama-3-8B-Instruct",
-  model_kwargs={"torch_dtype": torch.bfloat16},
-  device="cuda",
 )
-print("llama download successfully")
-messages = [
-    {
-        "role": "system",
-        "content": "You are an English tutor who teaches students English basics"
-    },
-    {
-        "role": "user",
-        "content": "Teach me present simple tense"
-    }
-]
-prompt = pipeline.tokenizer.tokenizer.apply_chat_template(
-    messages,
-    tokenize=False,
-    add_generation_prompt=True,
-)
-terminators = [
-    pipeline.tokenizer.eos_token_id,
-    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
-]
-outputs = pipeline(
-    prompt,
-    max_new_tokens = 256,
-    eos_token_id = terminators,
-    do_sample = True,
-    temperature = 0.6,
-    top_p = 0.9,
-)
-print(outputsp[0]["generated_text"][len(prompt):])

+#Loading the HF_TOKEN from the .env file
+from dotenv import load_dotenv
+load_dotenv()
 import transformers
 import torch
+from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
+#Loading llama3 model
+local_model_path = "meta-llama\\Meta-Llama-3-8B-Instruct"
+model = transformers.AutoModelForCausalLM.from_pretrained(local_model_path, torch_dtype=torch.bfloat16)
+tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side='left')
+# Set up the pipeline
 pipeline = transformers.pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
 )
+def chat_function(message, history, system_prompt,max_new_tokens,temperature):
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": message},
+    ]
+    prompt = pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    temp = temperature + 0.1
+    outputs = pipeline(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=temp,
+        top_p=0.9,
+    )
+    return outputs[0]["generated_text"][len(prompt):]
+message = "Hello, can you teach me past simple?"
+history = [("Hi!", "I'm doing well, thanks for asking!")]
+temperature = 0.7
+max_new_tokens = 50
+prompt = "Act as an english tutor. Always correct grammar and spelling mistakes. Always keep the conversation going by asking follow up questions"
+response = chat_function(message=message, history= history, system_prompt= prompt, max_new_tokens= max_new_tokens, temperature= temperature)
+print(response)