sounar commited on
Commit
2e8ea21
1 Parent(s): eedf7ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -18
app.py CHANGED
@@ -1,23 +1,36 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
-
5
- # Load the model
6
  model_name = "ruslanmv/Medical-Llama3-8B"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
 
 
 
9
 
10
- def generate_response(question):
11
- inputs = tokenizer(question, return_tensors="pt").input_ids.to(model.device)
12
- outputs = model.generate(inputs, max_new_tokens=256)
13
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- iface = gr.Interface(
16
- fn=generate_response,
17
- inputs="text",
18
- outputs="text",
19
- title="Medical Query Assistant",
20
- description="Ask medical questions and receive AI-powered answers.",
21
- )
22
 
23
- iface.launch(share=True)
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
2
  import torch
 
 
3
  model_name = "ruslanmv/Medical-Llama3-8B"
4
+ device_map = 'auto'
5
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.float16,)
6
+ model = AutoModelForCausalLM.from_pretrained( model_name,quantization_config=bnb_config, trust_remote_code=True,use_cache=False,device_map=device_map)
7
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
8
+ tokenizer.pad_token = tokenizer.eos_token
9
 
10
+ def askme(question):
11
+ sys_message = '''
12
+ You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
13
+ provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
14
+ '''
15
+ # Create messages structured for the chat template
16
+ messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": question}]
17
+
18
+ # Applying chat template
19
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
20
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
21
+ outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
22
+
23
+ # Extract and return the generated text, removing the prompt
24
+ response_text = tokenizer.batch_decode(outputs)[0].strip()
25
+ answer = response_text.split('<|im_start|>assistant')[-1].strip()
26
+ return answer
27
+ # Example usage
28
+ # - Context: First describe your problem.
29
+ # - Question: Then make the question.
30
 
31
+ question = '''I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
32
+ increased sensitivity to cold, and dry, itchy skin.
33
+ Could these symptoms be related to hypothyroidism?
34
+ If so, what steps should I take to get a proper diagnosis and discuss treatment options?'''
 
 
 
35
 
36
+ print(askme(question))