hassanaliemon
/

bn_rag_llama3-8b

Question Answering

Transformers

Safetensors

Bengali

Inference Endpoints

Model card Files Files and versions Community

hassanaliemon commited on Jun 11, 2024

Commit

e863a21

verified ·

1 Parent(s): 3d3b5f3

Update README.md

Browse files

Files changed (1) hide show

README.md +27 -6

README.md CHANGED Viewed

@@ -12,12 +12,24 @@ pipeline_tag: question-answering
 You can use the model with a pipeline for a high-level helper or load the model directly. Here's how:
 ```python
-# Load model directly
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("hassanaliemon/bn_rag_llama3-8b")
-model = AutoModelForCausalLM.from_pretrained("hassanaliemon/bn_rag_llama3-8b")
-prompt = """Below is an instruction in Bengali language that describes a task, paired with an input also in Bengali language that provides further context. Write a response in Bengali language that appropriately completes the request.
 ### Instruction:
 {}
@@ -28,18 +40,27 @@ prompt = """Below is an instruction in Bengali language that describes a task, p
 ### Response:
 {}
 """
 def generate_response(question, context):
-    inputs = tokenizer([prompt.format(question, context, "")], return_tensors="pt").to("cuda")
     outputs = model.generate(**inputs, max_new_tokens=1024, use_cache=True)
     responses = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
     response_start = responses.find("### Response:") + len("### Response:")
     response = responses[response_start:].strip()
     return response
-# Example Usage:
 question = "ভারতীয় বাঙালি কথাসাহিত্যিক মহাশ্বেতা দেবীর মৃত্যু কবে হয় ?"
 context = "২০১৬ সালের ২৩ জুলাই হৃদরোগে আক্রান্ত হয়ে মহাশ্বেতা দেবী কলকাতার বেল ভিউ ক্লিনিকে ভর্তি হন। সেই বছরই ২৮ জুলাই একাধিক অঙ্গ বিকল হয়ে তাঁর মৃত্যু ঘটে। তিনি মধুমেহ, সেপ্টিসেমিয়া ও মূত্র সংক্রমণ রোগেও ভুগছিলেন।"
 answer = generate_response(question, context)
 print(answer)
 ```

 You can use the model with a pipeline for a high-level helper or load the model directly. Here's how:
 ```python
+import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Determine the device to use (GPU if available, else CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load pre-trained model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained("hassanaliemon/bn_rag_llama3-8b")
+# model = AutoModelForCausalLM.from_pretrained("hassanaliemon/bn_rag_llama3-8b")
+model = AutoModelForCausalLM.from_pretrained(
+            "hassanaliemon/bn_rag_llama3-8b",
+            load_in_8bit=True,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+        )
+# Define the prompt template
+prompt = """এখানে একটি নির্দেশনা দেওয়া হলো, যা একটি কাজ সম্পন্ন করার উপায় বর্ণনা করে, এবং এর সাথে একটি ইনপুট দেওয়া হলো যা আরও প্রেক্ষাপট প্রদান করে। একটি উত্তর লিখুন যা অনুরোধটি সঠিকভাবে পূরণ করে। প্রসঙ্গ থেকে সুনির্দিষ্ট উত্তর দিন.
 ### Instruction:
 {}
 ### Response:
 {}
 """
 def generate_response(question, context):
+    # Tokenize the input and move tensors to the selected device
+    inputs = tokenizer([prompt.format(question, context, "")], return_tensors="pt").to(device)
+    # Generate the response
     outputs = model.generate(**inputs, max_new_tokens=1024, use_cache=True)
+    # Decode the generated text
     responses = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+    # Extract the response text
     response_start = responses.find("### Response:") + len("### Response:")
     response = responses[response_start:].strip()
     return response
+# Example Usage
 question = "ভারতীয় বাঙালি কথাসাহিত্যিক মহাশ্বেতা দেবীর মৃত্যু কবে হয় ?"
 context = "২০১৬ সালের ২৩ জুলাই হৃদরোগে আক্রান্ত হয়ে মহাশ্বেতা দেবী কলকাতার বেল ভিউ ক্লিনিকে ভর্তি হন। সেই বছরই ২৮ জুলাই একাধিক অঙ্গ বিকল হয়ে তাঁর মৃত্যু ঘটে। তিনি মধুমেহ, সেপ্টিসেমিয়া ও মূত্র সংক্রমণ রোগেও ভুগছিলেন।"
 answer = generate_response(question, context)
 print(answer)
+# মহাশ্বেতা দেবী ২০১৬ সালের ২৮ জুলাই মারা যান।
 ```