gbrabbit commited on
Commit
159a5fc
Β·
1 Parent(s): 060116e

Auto commit at 09-2025-08 6:57:03

Browse files
lily_llm_api/app_v2.py CHANGED
@@ -65,6 +65,7 @@ app.add_middleware(
65
  "http://127.0.0.1:8001",
66
  "http://localhost:3000",
67
  "http://127.0.0.1:3000",
 
68
  "*" # 개발 μ€‘μ—λŠ” λͺ¨λ“  origin ν—ˆμš©
69
  ],
70
  allow_credentials=True,
@@ -485,7 +486,7 @@ async def generate_multimodal(
485
  return_tensors="pt",
486
  padding=True,
487
  truncation=True,
488
- max_length=100,
489
  )
490
 
491
  if 'token_type_ids' in inputs:
@@ -557,10 +558,10 @@ async def generate_multimodal(
557
  top_k=40,
558
  top_p=top_p,
559
  repetition_penalty=1.1,
560
- no_repeat_ngram_size=2,
561
  pad_token_id=tokenizer.eos_token_id,
562
  eos_token_id=tokenizer.eos_token_id,
563
- use_cache=True
564
  )
565
  logger.info("βœ… μ‹€μ œ λ©€ν‹°λͺ¨λ‹¬ 생성 성곡!")
566
 
@@ -578,9 +579,9 @@ async def generate_multimodal(
578
  enhanced_inputs = tokenizer(
579
  enhanced_formatted_prompt,
580
  return_tensors="pt",
581
- padding=True,
582
- truncation=True,
583
- max_length=256
584
  )
585
 
586
  if 'token_type_ids' in enhanced_inputs:
@@ -597,10 +598,10 @@ async def generate_multimodal(
597
  top_k=40,
598
  top_p=top_p,
599
  repetition_penalty=1.1,
600
- no_repeat_ngram_size=2,
601
  pad_token_id=tokenizer.eos_token_id,
602
  eos_token_id=tokenizer.eos_token_id,
603
- use_cache=True
604
  )
605
  else:
606
  # ν…μŠ€νŠΈ μ „μš© 생성
@@ -613,11 +614,11 @@ async def generate_multimodal(
613
  temperature=temperature,
614
  top_k=40,
615
  top_p=top_p,
616
- repetition_penalty=1.1,
617
- no_repeat_ngram_size=2,
618
  pad_token_id=tokenizer.eos_token_id,
619
  eos_token_id=tokenizer.eos_token_id,
620
- use_cache=True
621
  )
622
 
623
  # 응닡 μΆ”μΆœ
 
65
  "http://127.0.0.1:8001",
66
  "http://localhost:3000",
67
  "http://127.0.0.1:3000",
68
+ "https://hearthchat-production.up.railway.app",
69
  "*" # 개발 μ€‘μ—λŠ” λͺ¨λ“  origin ν—ˆμš©
70
  ],
71
  allow_credentials=True,
 
486
  return_tensors="pt",
487
  padding=True,
488
  truncation=True,
489
+ max_length=max_length,
490
  )
491
 
492
  if 'token_type_ids' in inputs:
 
558
  top_k=40,
559
  top_p=top_p,
560
  repetition_penalty=1.1,
561
+ # no_repeat_ngram_size=2,
562
  pad_token_id=tokenizer.eos_token_id,
563
  eos_token_id=tokenizer.eos_token_id,
564
+ # use_cache=True
565
  )
566
  logger.info("βœ… μ‹€μ œ λ©€ν‹°λͺ¨λ‹¬ 생성 성곡!")
567
 
 
579
  enhanced_inputs = tokenizer(
580
  enhanced_formatted_prompt,
581
  return_tensors="pt",
582
+ # padding=True,
583
+ # truncation=True,
584
+ max_length=max_length
585
  )
586
 
587
  if 'token_type_ids' in enhanced_inputs:
 
598
  top_k=40,
599
  top_p=top_p,
600
  repetition_penalty=1.1,
601
+ # no_repeat_ngram_size=2,
602
  pad_token_id=tokenizer.eos_token_id,
603
  eos_token_id=tokenizer.eos_token_id,
604
+ # use_cache=True
605
  )
606
  else:
607
  # ν…μŠ€νŠΈ μ „μš© 생성
 
614
  temperature=temperature,
615
  top_k=40,
616
  top_p=top_p,
617
+ # repetition_penalty=1.1,
618
+ # no_repeat_ngram_size=2,
619
  pad_token_id=tokenizer.eos_token_id,
620
  eos_token_id=tokenizer.eos_token_id,
621
+ # use_cache=True
622
  )
623
 
624
  # 응닡 μΆ”μΆœ
lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED
@@ -16,6 +16,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
16
  logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
 
19
  class Kanana15V3bInstructProfile:
20
  """Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„"""
21
 
@@ -181,7 +183,7 @@ class Kanana15V3bInstructProfile:
181
  token=HF_TOKEN,
182
  torch_dtype=torch.float16,
183
  trust_remote_code=True,
184
- cache_dir="/app/cache/transformers",
185
  # device_map="auto",
186
  # low_cpu_mem_usage=True,
187
  ).to(DEVICE)
@@ -197,7 +199,7 @@ class Kanana15V3bInstructProfile:
197
 
198
  def get_generation_config(self) -> Dict[str, Any]:
199
  # λͺ¨λΈ νŒŒλΌλ―Έν„° μ΅œμ ν™” μ„€μ •, max_new_tokens : μƒμ„±λ˜λŠ” ν…μŠ€νŠΈ 길이 μ΅œλŒ€κ°’ (이미지 μ„€λͺ…을 μœ„ν•΄ 증가)
200
- return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
201
 
202
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
203
  """
 
16
  logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
+ max_new_tokens = 64
20
+
21
  class Kanana15V3bInstructProfile:
22
  """Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„"""
23
 
 
183
  token=HF_TOKEN,
184
  torch_dtype=torch.float16,
185
  trust_remote_code=True,
186
+ # cache_dir="/app/cache/transformers",
187
  # device_map="auto",
188
  # low_cpu_mem_usage=True,
189
  ).to(DEVICE)
 
199
 
200
  def get_generation_config(self) -> Dict[str, Any]:
201
  # λͺ¨λΈ νŒŒλΌλ―Έν„° μ΅œμ ν™” μ„€μ •, max_new_tokens : μƒμ„±λ˜λŠ” ν…μŠ€νŠΈ 길이 μ΅œλŒ€κ°’ (이미지 μ„€λͺ…을 μœ„ν•΄ 증가)
202
+ return {"max_new_tokens": max_new_tokens, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
203
 
204
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
205
  """
lily_llm_api/models/kanana_1_5_v_3b_instruct_250809_0055.py CHANGED
@@ -16,6 +16,8 @@ HF_TOKEN = os.getenv("HF_TOKEN")
16
  logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
 
19
  class Kanana15V3bInstructProfile:
20
  """Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„"""
21
 
@@ -175,7 +177,7 @@ class Kanana15V3bInstructProfile:
175
 
176
  def get_generation_config(self) -> Dict[str, Any]:
177
  # λͺ¨λΈ νŒŒλΌλ―Έν„° μ΅œμ ν™” μ„€μ •, max_new_tokens : μƒμ„±λ˜λŠ” ν…μŠ€νŠΈ 길이 μ΅œλŒ€κ°’ (이미지 μ„€λͺ…을 μœ„ν•΄ 증가)
178
- return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
179
 
180
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
181
  """
 
16
  logger = logging.getLogger(__name__)
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
+ max_new_tokens = 64
20
+
21
  class Kanana15V3bInstructProfile:
22
  """Kanana-1.5-v-3b-instruct λͺ¨λΈ ν”„λ‘œν•„"""
23
 
 
177
 
178
  def get_generation_config(self) -> Dict[str, Any]:
179
  # λͺ¨λΈ νŒŒλΌλ―Έν„° μ΅œμ ν™” μ„€μ •, max_new_tokens : μƒμ„±λ˜λŠ” ν…μŠ€νŠΈ 길이 μ΅œλŒ€κ°’ (이미지 μ„€λͺ…을 μœ„ν•΄ 증가)
180
+ return {"max_new_tokens": max_new_tokens, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
181
 
182
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
183
  """
lily_llm_core/config.py CHANGED
@@ -27,16 +27,16 @@ class DatabaseSettings(BaseSettings):
27
  class ModelSettings(BaseSettings):
28
  """λͺ¨λΈ μ„€μ •"""
29
 
30
- default_model: str = Field(default="polyglot-ko-1.3b-chat", description="κΈ°λ³Έ λͺ¨λΈ")
31
  model_cache_dir: str = Field(default="./models", description="λͺ¨λΈ μΊμ‹œ 디렉토리")
32
  model_download_timeout: int = Field(default=3600, description="λͺ¨λΈ λ‹€μš΄λ‘œλ“œ νƒ€μž„μ•„μ›ƒ (초)")
33
  model_max_memory: str = Field(default="4GB", description="λͺ¨λΈ μ΅œλŒ€ λ©”λͺ¨λ¦¬ μ‚¬μš©λŸ‰")
34
  model_device: str = Field(default="auto", description="λͺ¨λΈ μ‹€ν–‰ λ””λ°”μ΄μŠ€ (auto/cpu/cuda)")
35
  model_precision: str = Field(default="float16", description="λͺ¨λΈ 정밀도 (float16/float32/bfloat16)")
36
 
37
- # λͺ¨λΈλ³„ μ„€μ •
 
38
  polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot-ko-1.3b-chat", description="Polyglot 1.3b λͺ¨λΈ 경둜")
39
- kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b λͺ¨λΈ 경둜")
40
  polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot-ko-5.8b-chat", description="Polyglot 5.8b λͺ¨λΈ 경둜")
41
 
42
  class Config:
 
27
  class ModelSettings(BaseSettings):
28
  """λͺ¨λΈ μ„€μ •"""
29
 
30
+ default_model: str = Field(default="kanana-1.5-v-3b-instruct", description="κΈ°λ³Έ λͺ¨λΈ")
31
  model_cache_dir: str = Field(default="./models", description="λͺ¨λΈ μΊμ‹œ 디렉토리")
32
  model_download_timeout: int = Field(default=3600, description="λͺ¨λΈ λ‹€μš΄λ‘œλ“œ νƒ€μž„μ•„μ›ƒ (초)")
33
  model_max_memory: str = Field(default="4GB", description="λͺ¨λΈ μ΅œλŒ€ λ©”λͺ¨λ¦¬ μ‚¬μš©λŸ‰")
34
  model_device: str = Field(default="auto", description="λͺ¨λΈ μ‹€ν–‰ λ””λ°”μ΄μŠ€ (auto/cpu/cuda)")
35
  model_precision: str = Field(default="float16", description="λͺ¨λΈ 정밀도 (float16/float32/bfloat16)")
36
 
37
+ # λͺ¨λΈλ³„ μ„€μ •
38
+ kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b λͺ¨λΈ 경둜")
39
  polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot-ko-1.3b-chat", description="Polyglot 1.3b λͺ¨λΈ 경둜")
 
40
  polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot-ko-5.8b-chat", description="Polyglot 5.8b λͺ¨λΈ 경둜")
41
 
42
  class Config:
test.py CHANGED
@@ -1,11 +1,13 @@
1
  import requests
2
  import json
3
  import os # os λͺ¨λ“ˆ μΆ”κ°€
 
 
4
 
5
  # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 κ°€μ Έμ˜΅λ‹ˆλ‹€.
6
  # ν„°λ―Έλ„μ—μ„œ `set HUGGING_FACE_TOKEN=hf_...` (Windows) λ˜λŠ”
7
  # `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) λͺ…λ ΉμœΌλ‘œ 미리 μ„€μ •ν•©λ‹ˆλ‹€.
8
- HF_TOKEN = os.getenv("HUGGING_FACE_TOKEN")
9
 
10
  # ν—ˆκΉ…νŽ˜μ΄μŠ€ FastAPI μ„œλ²„ URL
11
  HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
@@ -26,7 +28,7 @@ def test_generate_text():
26
 
27
  data = {
28
  'prompt': 'μ•ˆλ…•ν•˜μ„Έμš”! Private μŠ€νŽ˜μ΄μŠ€μ—μ„œ 잘 μ§€λ‚΄μ‹œλ‚˜μš”?',
29
- 'max_length': 128
30
  }
31
 
32
  print(f"πŸ“€ μš”μ²­ 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
@@ -36,7 +38,7 @@ def test_generate_text():
36
  f"{HF_API_BASE}/generate",
37
  headers=headers, # <<-- 인증 헀더 μΆ”κ°€!
38
  data=data,
39
- timeout=300
40
  )
41
 
42
  print(f"βœ… μƒνƒœ μ½”λ“œ: {response.status_code}") # 이제 200이 ν‘œμ‹œλ  κ²ƒμž…λ‹ˆλ‹€.
 
1
  import requests
2
  import json
3
  import os # os λͺ¨λ“ˆ μΆ”κ°€
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
 
7
  # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 κ°€μ Έμ˜΅λ‹ˆλ‹€.
8
  # ν„°λ―Έλ„μ—μ„œ `set HUGGING_FACE_TOKEN=hf_...` (Windows) λ˜λŠ”
9
  # `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) λͺ…λ ΉμœΌλ‘œ 미리 μ„€μ •ν•©λ‹ˆλ‹€.
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
 
12
  # ν—ˆκΉ…νŽ˜μ΄μŠ€ FastAPI μ„œλ²„ URL
13
  HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
 
28
 
29
  data = {
30
  'prompt': 'μ•ˆλ…•ν•˜μ„Έμš”! Private μŠ€νŽ˜μ΄μŠ€μ—μ„œ 잘 μ§€λ‚΄μ‹œλ‚˜μš”?',
31
+ 'max_length': 20
32
  }
33
 
34
  print(f"πŸ“€ μš”μ²­ 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
 
38
  f"{HF_API_BASE}/generate",
39
  headers=headers, # <<-- 인증 헀더 μΆ”κ°€!
40
  data=data,
41
+ timeout=2000
42
  )
43
 
44
  print(f"βœ… μƒνƒœ μ½”λ“œ: {response.status_code}") # 이제 200이 ν‘œμ‹œλ  κ²ƒμž…λ‹ˆλ‹€.
test_hf_private.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os # os λͺ¨λ“ˆ μΆ”κ°€
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
+
7
+ # 1. ν™˜κ²½ λ³€μˆ˜μ—μ„œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 κ°€μ Έμ˜΅λ‹ˆλ‹€.
8
+ # ν„°λ―Έλ„μ—μ„œ `set HUGGING_FACE_TOKEN=hf_...` (Windows) λ˜λŠ”
9
+ # `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) λͺ…λ ΉμœΌλ‘œ 미리 μ„€μ •ν•©λ‹ˆλ‹€.
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+
12
+ # ν—ˆκΉ…νŽ˜μ΄μŠ€ FastAPI μ„œλ²„ URL
13
+ HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
14
+
15
+ def test_generate_text():
16
+ """ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ (인증 μΆ”κ°€)"""
17
+ print("\nπŸ” ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ...")
18
+
19
+ if not HF_TOKEN:
20
+ print("❌ HUGGING_FACE_TOKEN ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
21
+ return False
22
+
23
+ try:
24
+ # 2. 인증 토큰을 담을 헀더(headers)λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
25
+ headers = {
26
+ "Authorization": f"Bearer {HF_TOKEN}"
27
+ }
28
+
29
+ data = {
30
+ 'prompt': 'μ•ˆλ…•ν•˜μ„Έμš”! Private μŠ€νŽ˜μ΄μŠ€μ—μ„œ 잘 μ§€λ‚΄μ‹œλ‚˜μš”?',
31
+ 'max_length': 128
32
+ }
33
+
34
+ print(f"πŸ“€ μš”μ²­ 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
35
+
36
+ # 3. requests.post 호좜 μ‹œ headers νŒŒλΌλ―Έν„°λ₯Ό μΆ”κ°€ν•©λ‹ˆλ‹€.
37
+ response = requests.post(
38
+ f"{HF_API_BASE}/generate",
39
+ headers=headers, # <<-- 인증 헀더 μΆ”κ°€!
40
+ data=data,
41
+ timeout=500
42
+ )
43
+
44
+ print(f"βœ… μƒνƒœ μ½”λ“œ: {response.status_code}") # 이제 200이 ν‘œμ‹œλ  κ²ƒμž…λ‹ˆλ‹€.
45
+
46
+ if response.status_code == 200:
47
+ result = response.json()
48
+ print(f"βœ… 응닡: {json.dumps(result, indent=2, ensure_ascii=False)}")
49
+ else:
50
+ print(f"❌ 응닡: {response.text}")
51
+
52
+ return response.status_code == 200
53
+
54
+ except Exception as e:
55
+ print(f"❌ ν…μŠ€νŠΈ 생성 ν…ŒμŠ€νŠΈ μ‹€νŒ¨: {e}")
56
+ return False
57
+
58
+ # 슀크립트 μ‹€ν–‰
59
+ if __name__ == "__main__":
60
+ test_generate_text()