sounar commited on
Commit
72ff248
·
verified ·
1 Parent(s): 4646254

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -52
app.py CHANGED
@@ -1,69 +1,57 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
 
6
  # Retrieve the token from environment variables
7
- api_token = os.getenv("HF_TOKEN").strip()
8
 
9
- # Model name
10
- model_name = "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1"
 
 
11
 
12
- # Load the Hugging Face model and tokenizer with required arguments
13
- tokenizer = AutoTokenizer.from_pretrained(
14
- model_name,
15
- token=api_token, # Authenticate with Hugging Face token
16
- trust_remote_code=True # Allow custom code from the repository
 
17
  )
18
 
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_name,
21
- token=api_token,
 
 
22
  trust_remote_code=True,
23
- device_map="auto", # Efficient device allocation
24
- torch_dtype=torch.float16 # Mixed precision for faster inference
25
  )
26
 
27
- # Define the function to process user input
28
- def generate_response(input_text):
29
- try:
30
- # Tokenize the input text
31
- inputs = tokenizer(input_text, return_tensors="pt")
32
-
33
- # Ensure input tensor is sent to the same device as the model
34
- input_ids = inputs["input_ids"].to(model.device)
35
-
36
- # Add batch dimension (if missing)
37
- if len(input_ids.shape) == 1: # If shape is (seq_len,)
38
- input_ids = input_ids.unsqueeze(0) # Add batch dimension: (1, seq_len)
39
-
40
- # Generate a response using the model
41
- outputs = model.generate(
42
- input_ids,
43
- max_length=256, # Limit the output length
44
- num_return_sequences=1, # Generate a single response
45
- temperature=0.7, # Adjust for creativity vs. determinism
46
- top_p=0.9, # Nucleus sampling
47
- top_k=50 # Top-k sampling
48
- )
49
 
50
- # Decode and return the generated text
51
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
- return response
 
 
53
 
54
- except Exception as e:
55
- # Return error details in case of failure
56
- return f"Error: {str(e)}"
 
57
 
58
- # Create a Gradio interface
59
  iface = gr.Interface(
60
- fn=generate_response,
61
- inputs="text",
62
- outputs="text",
63
- title="ContactDoctor Medical Assistant",
64
- description="Provide input symptoms or queries and get AI-powered medical advice."
 
 
 
65
  )
66
 
67
- # Launch the Gradio app
68
  if __name__ == "__main__":
69
  iface.launch()
 
 
 
 
 
 
1
 
2
  # Retrieve the token from environment variables
3
+ #api_token = os.getenv("HF_TOKEN").strip()
4
 
5
+ import torch
6
+ from PIL import Image
7
+ from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
8
+ import gradio as gr
9
 
10
+ # Load the model and tokenizer
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_compute_dtype=torch.float16,
16
  )
17
 
18
+ model = AutoModel.from_pretrained(
19
+ "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
20
+ quantization_config=bnb_config,
21
+ device_map="auto",
22
+ torch_dtype=torch.float16,
23
  trust_remote_code=True,
24
+ attn_implementation="flash_attention_2",
 
25
  )
26
 
27
+ tokenizer = AutoTokenizer.from_pretrained(
28
+ "ContactDoctor/Bio-Medical-MultiModal-Llama-3-8B-V1",
29
+ trust_remote_code=True
30
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Define the function to handle the input
33
+ def process_input(image, question):
34
+ image = Image.open(image).convert("RGB")
35
+ msgs = [{'role': 'user', 'content': [image, question]}]
36
+ res = model.chat(image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=0.95, stream=True)
37
 
38
+ generated_text = ""
39
+ for new_text in res:
40
+ generated_text += new_text
41
+ return generated_text
42
 
43
+ # Gradio interface
44
  iface = gr.Interface(
45
+ fn=process_input,
46
+ inputs=[
47
+ gr.Image(type="file", label="Upload Image"),
48
+ gr.Textbox(lines=2, label="Question")
49
+ ],
50
+ outputs=gr.Textbox(label="Generated Response"),
51
+ title="BioMedical MultiModal Llama",
52
+ description="Upload an image and ask a medical question."
53
  )
54
 
 
55
  if __name__ == "__main__":
56
  iface.launch()
57
+