File size: 7,841 Bytes
81a6746
 
 
1e8aaad
81a6746
 
 
1e8aaad
81a6746
 
1e8aaad
 
81a6746
 
 
 
 
 
 
 
 
1e8aaad
81a6746
 
 
 
 
1e8aaad
81a6746
1e8aaad
81a6746
1e8aaad
81a6746
 
 
 
 
 
 
 
1e8aaad
81a6746
 
1e8aaad
 
81a6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e8aaad
 
81a6746
 
1e8aaad
 
cccd587
 
 
 
 
 
 
 
 
 
 
2505e32
cccd587
 
2505e32
 
cccd587
 
2505e32
cccd587
 
 
db66315
cccd587
 
085a8f2
cccd587
 
 
 
db66315
cccd587
 
 
 
 
db66315
cccd587
 
 
 
 
 
 
 
 
db66315
cccd587
 
 
 
 
 
 
db66315
cccd587
db66315
cccd587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81a6746
 
 
 
cccd587
81a6746
7573f0c
088ae0b
49dcd68
cccd587
81a6746
 
db66315
81a6746
db66315
81a6746
 
 
 
 
 
 
 
0e3b061
81a6746
 
 
 
 
 
0e3b061
81a6746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1f1c76
7651b25
81a6746
 
0e3b061
81a6746
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# import gradio as gr
# from huggingface_hub import InferenceClient
# import os

# """
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
# """

# token = os.getenv("HF_TOKEN")  
# client = InferenceClient("emilyalsentzer/Bio_ClinicalBERT", token=token)


# def respond(
#     message,
#     history: list[tuple[str, str]],
#     system_message,
#     max_tokens,
#     temperature,
#     top_p,
# ):
#     messages = [{"role": "system", "content": system_message}]

#     for val in history:
#         if val[0]:
#             messages.append({"role": "user", "content": val[0]})
#         if val[1]:
#             messages.append({"role": "assistant", "content": val[1]})

#     messages.append({"role": "user", "content": message})

#     response = ""

#     for message in client.chat_completion(
#         messages,
#         max_tokens=max_tokens,
#         stream=True,
#         temperature=temperature,
#         top_p=top_p,
#     ):
#         token = message.choices[0].delta.content

#         response += token
#         yield response


# """
# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
# """
# demo = gr.ChatInterface(
#     respond,
#     additional_inputs=[
#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#         gr.Slider(
#             minimum=0.1,
#             maximum=1.0,
#             value=0.95,
#             step=0.05,
#             label="Top-p (nucleus sampling)",
#         ),
#     ],
# )


# if __name__ == "__main__":
#     demo.launch()


# import gradio as gr
# from langdetect import detect
# from transformers import pipeline
# from qdrant_client import QdrantClient
# from qdrant_client.models import VectorParams, Distance
# from langchain.llms import HuggingFacePipeline
# from langchain.chains import RetrievalQA
# from langchain.vectorstores import Qdrant
# from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM
# from langchain.embeddings import HuggingFaceEmbeddings
# import os

# QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
# QDRANT_URL = os.getenv("QDRANT_URL")


# # Define model path
# model_name = "FreedomIntelligence/Apollo-7B"

# # Load model directly
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)

# # Enable padding token if missing
# tokenizer.pad_token = tokenizer.eos_token

# # Set up Qdrant vector store
# qdrant_client = QdrantClient(url=QDRANT_URL, api_key = QDRANT_API_KEY)
# vector_size = 768
# embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")

# qdrant_vectorstore = Qdrant(
#     client=qdrant_client,
#     collection_name="arabic_rag_collection",
#     embeddings=embedding
# )

# # Generation config
# generation_config = GenerationConfig(
#     max_new_tokens=150,
#     temperature=0.2,
#     top_k=20,
#     do_sample=True,
#     top_p=0.7,
#     repetition_penalty=1.3,
# )

# # Set up HuggingFace Pipeline
# llm_pipeline = pipeline(
#     model=model,
#     tokenizer=tokenizer,
#     task="text-generation",
#     generation_config=generation_config,
# )

# llm = HuggingFacePipeline(pipeline=llm_pipeline)

# # Set up QA Chain
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=qdrant_vectorstore.as_retriever(search_kwargs={"k": 3}),
#     chain_type="stuff"
# )

# # Generate prompt based on language
# def generate_prompt(question):
#     lang = detect(question)
#     if lang == "ar":
#         return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة. 
#  وتأكد من ان:
# - عدم تكرار أي نقطة أو عبارة أو كلمة
# - وضوح وسلاسة كل نقطة
# - تجنب الحشو والعبارات الزائدة-

# السؤال: {question}
# الإجابة:
# """
#     else:
#         return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas, phrases, or restate the question in the answer. If the context lacks relevant information, rely on your prior medical knowledge. If the answer involves multiple points, list them in concise and distinct bullet points:
# Question: {question}
# Answer:"""

# # Define Gradio interface function
# def medical_chatbot(question):
#     formatted_question = generate_prompt(question)
#     answer = qa_chain.run(formatted_question)
#     return answer

# # Set up Gradio interface
# iface = gr.Interface(
#     fn=medical_chatbot,
#     inputs=gr.Textbox(label="Ask a Medical Question", placeholder="Type your question here..."),
#     outputs=gr.Textbox(label="Answer", interactive=False),
#     title="Medical Chatbot",
#     description="Ask medical questions and get detailed answers in Arabic or English.",
#     theme="compact"
# )

# # Launch Gradio interface
# if __name__ == "__main__":
#     iface.launch()


import gradio as gr
from langdetect import detect
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline, GenerationConfig
import torch

# Load model and tokenizer
# model_name = "FreedomIntelligence/Apollo-7B"
# model_name = "emilyalsentzer/Bio_ClinicalBERT"
model_name = "FreedomIntelligence/Apollo-2B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token

generation_config = GenerationConfig(
    max_new_tokens=150,
    temperature=0.2,
    top_k=20,
    do_sample=True,
    top_p=0.7,
    repetition_penalty=1.3,
)

# Create generation pipeline
pipe = TextGenerationPipeline(
    model=model,
    tokenizer=tokenizer,
    device=model.device.index if torch.cuda.is_available() else "cpu"
)

# Prompt formatter based on language
def generate_prompt(message, history):
    lang = detect(message)
    if lang == "ar":
        return f"""أجب على السؤال الطبي التالي بلغة عربية فصحى، بإجابة دقيقة ومفصلة. إذا لم تجد معلومات كافية في السياق، استخدم معرفتك الطبية السابقة.
 وتأكد من ان:
- عدم تكرار أي نقطة أو عبارة أو كلمة
- وضوح وسلاسة كل نقطة
- تجنب الحشو والعبارات الزائدة

السؤال: {message}
الإجابة:"""
    else:
        return f"""Answer the following medical question in clear English with a detailed, non-redundant response. Do not repeat ideas or restate the question. If information is missing, rely on your prior medical knowledge:
Question: {message}
Answer:"""

# Chat function
def chat_fn(message, history):
    prompt = generate_prompt(message, history)
    response = pipe(prompt,
                    max_new_tokens=512,
                    temperature=0.7,
                    do_sample = True,
                    top_p=0.9)[0]['generated_text']
    answer = response.split("Answer:")[-1].strip() if "Answer:" in response else response.split("الإجابة:")[-1].strip()
    return answer

# Gradio ChatInterface
demo = gr.ChatInterface(
    fn=chat_fn,
    title="🩺 Apollo Medical Chatbot",
    description="Multilingual (Arabic & English) medical Q&A chatbot powered by Apollo-7B model inference.",
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    demo.launch(share=True)