ruslanmv
/

Medical-Mixtral-7B-v2k

@@ -18,66 +18,51 @@ pip install -qU  transformers==4.36.2  datasets python-dotenv peft bitsandbytes
 ## Example Usage
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Define the name of your fine-tuned model
 finetuned_model = 'ruslanmv/Medical-Mixtral-7B-v2k'
-# Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)
-# Load the model with the provided adapter configuration and weights
-model_pretrained = AutoModelForCausalLM.from_pretrained(finetuned_model, trust_remote_code=True, torch_dtype=torch.float16)
-messages = [
-    {'role': 'user', 'content': 'What should I do to reduce my weight gained due to genetic hypothyroidism?'},
-    {'role': 'assistant', 'content': ''},
-]
-input_ids = tokenizer.apply_chat_template(messages, return_tensors='pt').to('cuda')
-outputs = model_pretrained.generate(input_ids, max_new_tokens=500)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
-```
-For Gpus
-```python
-# Define the name of your fine-tuned model
-finetuned_model = 'ruslanmv/{new_model}'
 # Load fine-tuned model
 bnb_config = BitsAndBytesConfig(
-    load_in_4bit= True,
-    bnb_4bit_quant_type= "nf4",
-    bnb_4bit_compute_dtype= torch.bfloat16,
-    bnb_4bit_use_double_quant= False,
 )
 model_pretrained = AutoModelForCausalLM.from_pretrained(
-        finetuned_model,
-        load_in_4bit=True,
-        quantization_config=bnb_config,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        trust_remote_code=True,
-        cache_dir=cache_dir
 )
 # Load tokenizer
-tokenizer = AutoTokenizer.from_pretrained(finetuned_model,
-                                          trust_remote_code=True,
-                                          cache_dir=cache_dir)
-pipe = pipeline(task="text-generation",
-                model=model_pretrained,
-                tokenizer=tokenizer, max_length=200)
 def build_prompt(question):
-  prompt=f"[INST]@Enlighten. {question} [/INST]"
-  return prompt
-question = "What does abutment of the nerve root mean?"
 prompt = build_prompt(question)
-result = pipe(prompt)
 ```

 ## Example Usage
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging, BitsAndBytesConfig
+import os, torch
 # Define the name of your fine-tuned model
 finetuned_model = 'ruslanmv/Medical-Mixtral-7B-v2k'
 # Load fine-tuned model
 bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=False,
 )
 model_pretrained = AutoModelForCausalLM.from_pretrained(
+    finetuned_model,
+    load_in_4bit=True,
+    quantization_config=bnb_config,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True
 )
 # Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)
+# Set pad_token_id to eos_token_id
+model_pretrained.config.pad_token_id = tokenizer.eos_token_id
+pipe = pipeline(task="text-generation", model=model_pretrained, tokenizer=tokenizer, max_length=500)
 def build_prompt(question):
+    prompt = f"[INST]@Enlighten. [/INST] {question}"
+    return prompt
+question = "Are my symptoms due to HIV infection? I had a high-risk exposure 15 months ago"
 prompt = build_prompt(question)
+# Generate text based on the prompt
+result = pipe(prompt)[0]
+generated_text = result['generated_text']
+# Remove the prompt from the generated text
+generated_text = generated_text.replace(prompt, "", 1).strip()
+print(generated_text)
 ```