chatbot4_1

Runtime error

App Files Files Community

KaiShin1885 commited on Sep 29, 2024

Commit

221d902

verified ·

1 Parent(s): 0272e5e

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -18

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import os
 from huggingface_hub import InferenceClient
 import asyncio
 import subprocess
 # 로깅 설정
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
@@ -16,7 +18,7 @@ intents.guilds = True
 intents.guild_messages = True
 # 추론 API 클라이언트 설정
-hf_client = InferenceClient("CohereForAI/aya-23-8B", token=os.getenv("HF_TOKEN"))
 # 특정 채널 ID
 SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
@@ -24,6 +26,21 @@ SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
 # 대화 히스토리를 저장할 전역 변수
 conversation_history = []
 class MyClient(discord.Client):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -42,15 +59,9 @@ class MyClient(discord.Client):
         if self.is_processing:
             return
         self.is_processing = True
         try:
-            if not isinstance(message.channel, discord.Thread):
-                thread = await message.create_thread(name=f"논문 작성 - {message.author.display_name}", auto_archive_duration=60)
-            else:
-                thread = message.channel
             response = await generate_response(message)
-            await thread.send(response)
         finally:
             self.is_processing = False
@@ -59,14 +70,17 @@ class MyClient(discord.Client):
             isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
         )
 async def generate_response(message):
     global conversation_history
     user_input = message.content
     user_mention = message.author.mention
-    system_message = f"{user_mention}, Discord에서 사용자들의 질문에 답하는 어시스턴트입니다."
     system_prefix = """
     1. 주제에 따른 문맥 이해에 맞는 글을 써주세요.
     2. 주제와 상황에 맞는 적절한 어휘를 선택해주세요
     3. 한국 문화와 적합성를 고려해주세요
@@ -188,28 +202,48 @@ async def generate_response(message):
     """
     conversation_history.append({"role": "user", "content": user_input})
-    logging.debug(f'Conversation history updated: {conversation_history}')
     messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] + conversation_history
     logging.debug(f'Messages to be sent to the model: {messages}')
     loop = asyncio.get_event_loop()
     response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
         messages, max_tokens=1000, stream=True, temperature=0.7, top_p=0.85))
     full_response = []
     for part in response:
         logging.debug(f'Part received from stream: {part}')
         if part.choices and part.choices[0].delta and part.choices[0].delta.content:
             full_response.append(part.choices[0].delta.content)
     full_response_text = ''.join(full_response)
     logging.debug(f'Full model response: {full_response_text}')
     conversation_history.append({"role": "assistant", "content": full_response_text})
     return f"{user_mention}, {full_response_text}"
 if __name__ == "__main__":
     discord_client = MyClient(intents=intents)
     discord_client.run(os.getenv('DISCORD_TOKEN'))

 from huggingface_hub import InferenceClient
 import asyncio
 import subprocess
+from datasets import load_dataset
+from sentence_transformers import SentenceTransformer, util
 # 로깅 설정
 logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
 intents.guild_messages = True
 # 추론 API 클라이언트 설정
+hf_client = InferenceClient("CohereForAI/c4ai-command-r-08-2024", token=os.getenv("HF_TOKEN"))
 # 특정 채널 ID
 SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
 # 대화 히스토리를 저장할 전역 변수
 conversation_history = []
+# 데이터셋 로드
+datasets = [
+    ("all-processed", "all-processed"),
+    ("chatdoctor-icliniq", "chatdoctor-icliniq"),
+    ("chatdoctor_healthcaremagic", "chatdoctor_healthcaremagic"),
+    # ... (나머지 데이터셋)
+]
+all_datasets = {}
+for dataset_name, config in datasets:
+    all_datasets[dataset_name] = load_dataset("lavita/medical-qa-datasets", config)
+# 문장 임베딩 모델 로드
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 class MyClient(discord.Client):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         if self.is_processing:
             return
         self.is_processing = True
         try:
             response = await generate_response(message)
+            await message.channel.send(response)
         finally:
             self.is_processing = False
             isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
         )
 async def generate_response(message):
     global conversation_history
     user_input = message.content
     user_mention = message.author.mention
+    # 유사한 데이터 찾기
+    most_similar_data = find_most_similar_data(user_input)
+    system_message = f"{user_mention}, DISCORD에서 사용자들의 질문에 답하는 어시스턴트입니다."
     system_prefix = """
     1. 주제에 따른 문맥 이해에 맞는 글을 써주세요.
     2. 주제와 상황에 맞는 적절한 어휘를 선택해주세요
     3. 한국 문화와 적합성를 고려해주세요
     """
     conversation_history.append({"role": "user", "content": user_input})
     messages = [{"role": "system", "content": f"{system_prefix} {system_message}"}] + conversation_history
+    if most_similar_data:
+        messages.append({"role": "system", "content": f"관련 정보: {most_similar_data}"})
     logging.debug(f'Messages to be sent to the model: {messages}')
     loop = asyncio.get_event_loop()
     response = await loop.run_in_executor(None, lambda: hf_client.chat_completion(
         messages, max_tokens=1000, stream=True, temperature=0.7, top_p=0.85))
     full_response = []
     for part in response:
         logging.debug(f'Part received from stream: {part}')
         if part.choices and part.choices[0].delta and part.choices[0].delta.content:
             full_response.append(part.choices[0].delta.content)
     full_response_text = ''.join(full_response)
     logging.debug(f'Full model response: {full_response_text}')
     conversation_history.append({"role": "assistant", "content": full_response_text})
     return f"{user_mention}, {full_response_text}"
+def find_most_similar_data(query):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    most_similar = None
+    highest_similarity = -1
+    for dataset_name, dataset in all_datasets.items():
+        for split in dataset.keys():
+            for item in dataset[split]:
+                if 'question' in item and 'answer' in item:
+                    item_text = f"질문: {item['question']} 답변: {item['answer']}"
+                    item_embedding = model.encode(item_text, convert_to_tensor=True)
+                    similarity = util.pytorch_cos_sim(query_embedding, item_embedding).item()
+                    if similarity > highest_similarity:
+                        highest_similarity = similarity
+                        most_similar = item_text
+    return most_similar
 if __name__ == "__main__":
     discord_client = MyClient(intents=intents)
     discord_client.run(os.getenv('DISCORD_TOKEN'))