seawolf2357 commited on
Commit
104f578
โ€ข
1 Parent(s): aece062

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -36
app.py CHANGED
@@ -5,6 +5,7 @@ from huggingface_hub import InferenceClient
5
  import asyncio
6
  import subprocess
7
  from datasets import load_dataset
 
8
 
9
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
10
  print("Current Working Directory:", os.getcwd())
@@ -22,8 +23,16 @@ if missing_files:
22
  else:
23
  print("All files are present in the current directory.")
24
 
25
- # ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
26
- law_dataset = load_dataset('csv', data_files=data_files)
 
 
 
 
 
 
 
 
27
  print("Dataset loaded successfully.")
28
 
29
  # ๋กœ๊น… ์„ค์ •
@@ -36,7 +45,7 @@ intents.messages = True
36
  intents.guilds = True
37
  intents.guild_messages = True
38
 
39
- # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
40
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
41
 
42
  # ํŠน์ • ์ฑ„๋„ ID
@@ -81,43 +90,25 @@ class MyClient(discord.Client):
81
 
82
  async def generate_response(message):
83
  global conversation_history
84
- case_number = message.content.strip()
85
  user_mention = message.author.mention
86
- system_message = f"{user_mention}, ๋‹ค์Œ์€ ์š”์ฒญํ•˜์‹  ์‚ฌ๊ฑด์˜ ์ „๋ฌธ์ž…๋‹ˆ๋‹ค:"
87
-
88
- # ๋ฐ์ดํ„ฐ ๊ฒ€์ƒ‰ ๋ฐ ์‘๋‹ต ์ค€๋น„
89
- full_text = search_in_dataset_by_case_number(case_number, law_dataset)
90
- full_response_text = f"{system_message}\n\n{full_text}"
91
- max_length = 2000
92
- if len(full_response_text) > max_length:
93
- response_parts = []
94
- for i in range(0, len(full_response_text), max_length):
95
- part_response = full_response_text[i:i+max_length]
96
- await message.channel.send(part_response)
97
- response_parts.append(part_response)
98
- logging.debug(f'Full model response sent: {full_response_text}')
99
- conversation_history.append({"role": "assistant", "content": full_response_text})
100
- return "".join(response_parts) # ์กฐ๊ฐ๋‚œ ๋ฉ”์‹œ์ง€๋“ค์„ ์—ฐ๊ฒฐํ•˜์—ฌ ๋ฐ˜ํ™˜
101
  else:
102
- await message.channel.send(full_response_text)
103
- logging.debug(f'Full model response sent: {full_response_text}')
104
- conversation_history.append({"role": "assistant", "content": full_response_text})
105
- return full_response_text # ์ „์ฒด ๋ฉ”์‹œ์ง€๋ฅผ ๋ฐ˜ํ™˜
106
-
107
- return "" # ๋ฐ˜ํ™˜ ๊ฐ’์ด ์—†์„ ๊ฒฝ์šฐ ๋นˆ ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜
108
 
109
- def search_in_dataset(query, dataset):
110
- for record in dataset['train']:
111
- if record['์‚ฌ๊ฑด๋ช…'] and query in record['์‚ฌ๊ฑด๋ช…']:
112
- return record['์‚ฌ๊ฑด๋ฒˆํ˜ธ']
113
- return "๊ด€๋ จ ๋ฒ•๋ฅ  ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
114
-
115
- def search_in_dataset_by_case_number(case_number, dataset):
116
- for record in dataset['train']:
117
- if record['์‚ฌ๊ฑด๋ฒˆํ˜ธ'] == case_number:
118
- return record.get('์ „๋ฌธ', "ํ•ด๋‹น ์‚ฌ๊ฑด์— ๋Œ€ํ•œ ์ „๋ฌธ ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
119
- return "๊ด€๋ จ ๋ฒ•๋ฅ  ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
120
 
121
  if __name__ == "__main__":
122
  discord_client = MyClient(intents=intents)
123
  discord_client.run(os.getenv('DISCORD_TOKEN'))
 
 
5
  import asyncio
6
  import subprocess
7
  from datasets import load_dataset
8
+ import pandas as pd
9
 
10
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
11
  print("Current Working Directory:", os.getcwd())
 
23
  else:
24
  print("All files are present in the current directory.")
25
 
26
+ # ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ ๋ฐ ์ตœ์ ํ™”
27
+ def load_optimized_dataset(data_files):
28
+ data_frames = [pd.read_csv(file) for file in data_files]
29
+ full_data = pd.concat(data_frames, ignore_index=True)
30
+ # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
31
+ name_to_number = full_data.set_index('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].to_dict()
32
+ number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
33
+ return name_to_number, number_to_fulltext
34
+
35
+ name_to_number, number_to_fulltext = load_optimized_dataset(data_files)
36
  print("Dataset loaded successfully.")
37
 
38
  # ๋กœ๊น… ์„ค์ •
 
45
  intents.guilds = True
46
  intents.guild_messages = True
47
 
48
+ # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ • (์˜ˆ์‹œ๋กœ ํฌํ•จ, ์‹ค์ œ๋กœ ์‚ฌ์šฉ๋˜์ง€ ์•Š์Œ)
49
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
50
 
51
  # ํŠน์ • ์ฑ„๋„ ID
 
90
 
91
  async def generate_response(message):
92
  global conversation_history
93
+ user_input = message.content.strip()
94
  user_mention = message.author.mention
95
+
96
+ # ์ž…๋ ฅ์ด ์‚ฌ๊ฑด๋ช…์ธ์ง€ ์‚ฌ๊ฑด๋ฒˆํ˜ธ์ธ์ง€ ํ™•์ธ
97
+ if user_input in name_to_number:
98
+ case_number = name_to_number[user_input]
99
+ system_message = f"{user_mention}, '{user_input}' ์‚ฌ๊ฑด์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n์‚ฌ๊ฑด๋ฒˆํ˜ธ: {case_number}"
100
+ elif user_input in number_to_fulltext:
101
+ full_text = number_to_fulltext[user_input]
102
+ system_message = f"{user_mention}, ์‚ฌ๊ฑด๋ฒˆํ˜ธ '{user_input}'์˜ ์ „๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n\n{full_text}"
 
 
 
 
 
 
 
103
  else:
104
+ system_message = f"{user_mention}, ๊ด€๋ จ ๋ฒ•๋ฅ  ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
 
 
 
 
105
 
106
+ # ์‘๋‹ต ๋ณด๋‚ด๊ธฐ
107
+ await message.channel.send(system_message)
108
+ logging.debug(f'Full model response sent: {system_message}')
109
+ conversation_history.append({"role": "assistant", "content": system_message})
 
 
 
 
 
 
 
110
 
111
  if __name__ == "__main__":
112
  discord_client = MyClient(intents=intents)
113
  discord_client.run(os.getenv('DISCORD_TOKEN'))
114
+