Muhammadidrees commited on
Commit
429fe72
·
verified ·
1 Parent(s): 87b06f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -100
app.py CHANGED
@@ -1,100 +1,120 @@
1
- # chat.py
2
- import os
3
- import gc
4
- import torch
5
- from transformers import LlamaTokenizer, LlamaForCausalLM
6
-
7
- # =============================
8
- # Configuration
9
- # =============================
10
- MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
11
- MAX_NEW_TOKENS = 200
12
- TEMPERATURE = 0.5
13
- TOP_K = 50
14
- REPETITION_PENALTY = 1.1
15
-
16
- # Detect device
17
- device = "cuda" if torch.cuda.is_available() else "cpu"
18
- print(f"Loading model from {MODEL_PATH} on {device}...")
19
-
20
- # =============================
21
- # Load Tokenizer and Model
22
- # =============================
23
- tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
24
-
25
- model = LlamaForCausalLM.from_pretrained(
26
- MODEL_PATH,
27
- device_map="auto", # automatically dispatch weights to GPU
28
- torch_dtype=torch.float16, # half precision for faster inference
29
- low_cpu_mem_usage=True # optimize CPU memory
30
- )
31
-
32
- # DO NOT call model.to(device) when using device_map="auto"
33
- generator = model.generate
34
- print("✅ Model loaded successfully!\n")
35
-
36
- # =============================
37
- # Chat History
38
- # =============================
39
- history = ["ChatDoctor: I am ChatDoctor, what medical questions do you have?"]
40
-
41
- # =============================
42
- # Response Function
43
- # =============================
44
- def get_response(user_input):
45
- global history
46
- human_invitation = "Patient: "
47
- doctor_invitation = "ChatDoctor: "
48
-
49
- # Append user input
50
- history.append(human_invitation + user_input)
51
-
52
- # Build prompt
53
- prompt = "\n".join(history) + "\n" + doctor_invitation
54
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
55
-
56
- # Generate response
57
- with torch.no_grad():
58
- output_ids = generator(
59
- input_ids,
60
- max_new_tokens=MAX_NEW_TOKENS,
61
- do_sample=True,
62
- temperature=TEMPERATURE,
63
- top_k=TOP_K,
64
- repetition_penalty=REPETITION_PENALTY
65
- )
66
-
67
- # Decode response
68
- full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
69
- response = full_output[len(prompt):].strip()
70
-
71
- # Clean if the model repeats the patient prompt
72
- if response.startswith("Patient:"):
73
- response = response[len("Patient:"):].strip()
74
-
75
- # Append model response to history
76
- history.append(doctor_invitation + response)
77
-
78
- # Free memory
79
- del input_ids, output_ids
80
- gc.collect()
81
- torch.cuda.empty_cache()
82
-
83
- return response
84
-
85
- # =============================
86
- # CLI Chat
87
- # =============================
88
- if __name__ == "__main__":
89
- print("\n=== ChatDoctor is ready! Type your questions. ===\n")
90
- while True:
91
- try:
92
- user_input = input("Patient: ").strip()
93
- if user_input.lower() in ["exit", "quit"]:
94
- print("Exiting ChatDoctor. Goodbye!")
95
- break
96
- response = get_response(user_input)
97
- print("ChatDoctor: " + response + "\n")
98
- except KeyboardInterrupt:
99
- print("\nExiting ChatDoctor. Goodbye!")
100
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # chat.py
2
+ import os
3
+ import gc
4
+ import torch
5
+ from transformers import LlamaTokenizer, LlamaForCausalLM
6
+
7
+ # =============================
8
+ # Configuration
9
+ # =============================
10
+ MODEL_PATH = r"C:\Users\JAY\Downloads\Chatdoc\ChatDoctor\pretrained"
11
+ MAX_NEW_TOKENS = 200
12
+ TEMPERATURE = 0.5
13
+ TOP_K = 50
14
+ REPETITION_PENALTY = 1.1
15
+
16
+ # Detect device
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ print(f"Loading model from {MODEL_PATH} on {device}...")
19
+
20
+ # =============================
21
+ # Load Tokenizer and Model
22
+ # =============================
23
+ tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)
24
+
25
+ model = LlamaForCausalLM.from_pretrained(
26
+ MODEL_PATH,
27
+ device_map="auto", # automatically dispatch weights to GPU
28
+ torch_dtype=torch.float16, # half precision for faster inference
29
+ low_cpu_mem_usage=True # optimize CPU memory
30
+ )
31
+
32
+ # DO NOT call model.to(device) when using device_map="auto"
33
+ generator = model.generate
34
+ print("✅ Model loaded successfully!\n")
35
+
36
+ # =============================
37
+ # Chat History
38
+ # =============================
39
+ systemprompt = ("""You are ChatDoctor an intelligent, empathetic medical AI assistant.
40
+ Your role is to carefully gather medical information, reason clinically,
41
+ and provide safe, evidence-based guidance.
42
+
43
+ Follow these instructions strictly:
44
+ 1. When a patient describes their illness, DO NOT diagnose immediately.
45
+ 2. Ask relevant, targeted questions to collect all necessary details
46
+ such as symptoms, duration, severity, lifestyle habits, medical history,
47
+ medications, and any recent tests or changes.
48
+ 3. Once you have enough information for a preliminary diagnosis, clearly
49
+ explain your reasoning and possible causes in simple medical language.
50
+ 4. Then, provide a clear and structured response that includes:
51
+ - **Diagnosis:** probable or confirmed condition(s)
52
+ - **Dietary Advice:** foods to include and avoid
53
+ - **Lifestyle Advice:** exercise, sleep, stress, and other habits
54
+ 5. Be concise, empathetic, and professional at all times.
55
+ 6. Never switch roles or generate “Patient:” responses. Always remain as ChatDoctor.
56
+ 7. If symptoms suggest a serious or emergency condition, advise the patient
57
+ to seek immediate medical attention.""")
58
+
59
+ history = [systemprompt, "ChatDoctor: I am ChatDoctor, what medical questions do you have?"]
60
+
61
+ # =============================
62
+ # Response Function
63
+ # =============================
64
+ def get_response(user_input):
65
+ global history
66
+ human_invitation = "Patient: "
67
+ doctor_invitation = "ChatDoctor: "
68
+
69
+ # Append user input
70
+ history.append(human_invitation + user_input)
71
+
72
+ # Build prompt
73
+ prompt = "\n".join(history) + "\n" + doctor_invitation
74
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
75
+
76
+ # Generate response
77
+ with torch.no_grad():
78
+ output_ids = generator(
79
+ input_ids,
80
+ max_new_tokens=MAX_NEW_TOKENS,
81
+ do_sample=True,
82
+ temperature=TEMPERATURE,
83
+ top_k=TOP_K,
84
+ repetition_penalty=REPETITION_PENALTY
85
+ )
86
+
87
+ # Decode response
88
+ full_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
89
+ response = full_output[len(prompt):].strip()
90
+
91
+ # Clean if the model repeats the patient prompt
92
+ if response.startswith("Patient:"):
93
+ response = response[len("Patient:"):].strip()
94
+
95
+ # Append model response to history
96
+ history.append(doctor_invitation + response)
97
+
98
+ # Free memory
99
+ del input_ids, output_ids
100
+ gc.collect()
101
+ torch.cuda.empty_cache()
102
+
103
+ return response
104
+
105
+ # =============================
106
+ # CLI Chat
107
+ # =============================
108
+ if __name__ == "__main__":
109
+ print("\n=== ChatDoctor is ready! Type your questions. ===\n")
110
+ while True:
111
+ try:
112
+ user_input = input("Patient: ").strip()
113
+ if user_input.lower() in ["exit", "quit"]:
114
+ print("Exiting ChatDoctor. Goodbye!")
115
+ break
116
+ response = get_response(user_input)
117
+ print("ChatDoctor: " + response + "\n")
118
+ except KeyboardInterrupt:
119
+ print("\nExiting ChatDoctor. Goodbye!")
120
+ break