Spaces:
Running
Running
Commit
ยท
526927a
0
Parent(s):
Fresh start for HF Spaces deployment
Browse filesThis view is limited to 50 files because it contains too many changes. ย
See raw diff
- .dockerignore +119 -0
- .gitattributes +2 -0
- .gitignore +230 -0
- .vscode/settings.json +4 -0
- DEPLOYMENT_CHECKLIST.md +152 -0
- DEPLOYMENT_GUIDE.md +204 -0
- Dockerfile +66 -0
- Dockerfile.blank +43 -0
- Dockerfile.gpu +57 -0
- Dockerfile.huggingface +66 -0
- Dockerfile.huggingface.predownload +65 -0
- Dockerfile.latex-ocr +70 -0
- Dockerfile.local +54 -0
- ENVIRONMENT_VARIABLES.md +162 -0
- GPU_DEPLOYMENT_GUIDE.md +240 -0
- HEARTH_CHAT_INTEGRATION.md +382 -0
- HISTORY.md +191 -0
- HUGGINGFACE_CLOUD_GUIDE.md +241 -0
- PROMPT.md +105 -0
- README.md +137 -0
- README_DEPLOYMENT.md +304 -0
- README_LILY.md +137 -0
- README_gradio.md +40 -0
- README_huggingface.md +137 -0
- WINDOWS_GPU_DEPLOYMENT_GUIDE.md +292 -0
- __init__.py +1 -0
- app_huggingface.py +102 -0
- app_local.py +19 -0
- config.yaml +1 -0
- deploy_gpu.sh +76 -0
- deploy_gpu_huggingface.sh +90 -0
- deploy_gpu_windows.bat +91 -0
- docker-compose.gpu.yml +66 -0
- docker-compose.yml +101 -0
- docs/API_REFERENCE.md +507 -0
- docs/USER_GUIDE.md +719 -0
- download_model.py +57 -0
- fix_huggingface_hub.bat +31 -0
- huggingface_cloud_setup.py +186 -0
- huggingface_gpu_setup.py +210 -0
- lily-math-rag +1 -0
- lily_llm.db +0 -0
- lily_llm_api/app.py +246 -0
- lily_llm_api/app_v2.py +2049 -0
- lily_llm_api/models/__init__.py +49 -0
- lily_llm_api/models/dialogpt_medium.py +82 -0
- lily_llm_api/models/kanana_1_5_2_1b_instruct.py +93 -0
- lily_llm_api/models/kanana_1_5_v_3b_instruct.py +246 -0
- lily_llm_api/models/kanana_nano_2_1b_instruct.py +95 -0
- lily_llm_api/models/mistral_7b_instruct.py +103 -0
.dockerignore
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Git
|
2 |
+
.git
|
3 |
+
.gitignore
|
4 |
+
.env
|
5 |
+
lily_llm_media/
|
6 |
+
lily_llm_env/
|
7 |
+
lily_llm_core/models/
|
8 |
+
lily_llm_ignore/
|
9 |
+
vector_stores/
|
10 |
+
latex_ocr_env/
|
11 |
+
lily_llm_utils/LaTeX-OCR/
|
12 |
+
hearth_llm_model/
|
13 |
+
ocr_models/
|
14 |
+
latex_ocr_faiss_simple/
|
15 |
+
latex_ocr_faiss_stores/
|
16 |
+
uploads/
|
17 |
+
simple_stores/
|
18 |
+
notebooks/
|
19 |
+
lily_llm_etc/
|
20 |
+
*.safetensors
|
21 |
+
*.pth
|
22 |
+
|
23 |
+
# Python
|
24 |
+
__pycache__/
|
25 |
+
*.py[cod]
|
26 |
+
*$py.class
|
27 |
+
*.so
|
28 |
+
.Python
|
29 |
+
build/
|
30 |
+
develop-eggs/
|
31 |
+
dist/
|
32 |
+
downloads/
|
33 |
+
eggs/
|
34 |
+
.eggs/
|
35 |
+
lib/
|
36 |
+
lib64/
|
37 |
+
parts/
|
38 |
+
sdist/
|
39 |
+
var/
|
40 |
+
wheels/
|
41 |
+
*.egg-info/
|
42 |
+
.installed.cfg
|
43 |
+
*.egg
|
44 |
+
MANIFEST
|
45 |
+
|
46 |
+
# Virtual environments
|
47 |
+
venv/
|
48 |
+
env/
|
49 |
+
ENV/
|
50 |
+
.venv/
|
51 |
+
.env/
|
52 |
+
|
53 |
+
# IDE
|
54 |
+
.vscode/
|
55 |
+
.idea/
|
56 |
+
*.swp
|
57 |
+
*.swo
|
58 |
+
*~
|
59 |
+
|
60 |
+
# OS
|
61 |
+
.DS_Store
|
62 |
+
Thumbs.db
|
63 |
+
|
64 |
+
# Logs
|
65 |
+
*.log
|
66 |
+
logs/
|
67 |
+
*.out
|
68 |
+
|
69 |
+
# Cache
|
70 |
+
cache/
|
71 |
+
.cache/
|
72 |
+
__pycache__/
|
73 |
+
|
74 |
+
# Temporary files
|
75 |
+
temp/
|
76 |
+
tmp/
|
77 |
+
*.tmp
|
78 |
+
*.temp
|
79 |
+
|
80 |
+
# Large model files (will be downloaded at runtime from Hugging Face Hub)
|
81 |
+
*.bin
|
82 |
+
*.safetensors
|
83 |
+
*.pt
|
84 |
+
*.pth
|
85 |
+
models/*/
|
86 |
+
lily_llm_core/models/*/
|
87 |
+
|
88 |
+
# Data files
|
89 |
+
data/
|
90 |
+
uploads/
|
91 |
+
vector_stores/
|
92 |
+
|
93 |
+
# Backup files
|
94 |
+
backup/
|
95 |
+
*.backup
|
96 |
+
*.bak
|
97 |
+
|
98 |
+
# Documentation (except README)
|
99 |
+
docs/
|
100 |
+
*.md
|
101 |
+
!README_huggingface.md
|
102 |
+
|
103 |
+
# Test files
|
104 |
+
test_*
|
105 |
+
*_test.py
|
106 |
+
tests/
|
107 |
+
|
108 |
+
# Docker
|
109 |
+
Dockerfile
|
110 |
+
Dockerfile.*
|
111 |
+
!Dockerfile.huggingface
|
112 |
+
docker-compose.yml
|
113 |
+
docker-compose.yaml
|
114 |
+
|
115 |
+
# Other configs
|
116 |
+
.env
|
117 |
+
.env.*
|
118 |
+
config.yaml
|
119 |
+
config.json
|
.gitattributes
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.github/
|
2 |
+
.env
|
3 |
+
lily_llm_env/
|
4 |
+
lily_llm_core/models/
|
5 |
+
__pycache__/
|
6 |
+
*.pyc
|
7 |
+
.ipynb_checkpoints/
|
8 |
+
lily_llm_media/
|
9 |
+
vector_stores/
|
10 |
+
latex_ocr_env/
|
11 |
+
lily_llm_ignore/
|
12 |
+
lily_llm_utils/LaTeX-OCR/
|
13 |
+
hearth_llm_model/
|
14 |
+
ocr_models/
|
15 |
+
latex_ocr_faiss_simple/
|
16 |
+
latex_ocr_faiss_stores/
|
17 |
+
uploads/
|
18 |
+
simple_stores/
|
19 |
+
notebooks/
|
20 |
+
lily_llm_etc/
|
21 |
+
*.safetensors
|
22 |
+
*.pth
|
23 |
+
|
24 |
+
# Byte-compiled / optimized / DLL files
|
25 |
+
__pycache__/
|
26 |
+
*.py[codz]
|
27 |
+
*$py.class
|
28 |
+
|
29 |
+
# C extensions
|
30 |
+
*.so
|
31 |
+
|
32 |
+
# Distribution / packaging
|
33 |
+
.Python
|
34 |
+
build/
|
35 |
+
develop-eggs/
|
36 |
+
dist/
|
37 |
+
downloads/
|
38 |
+
eggs/
|
39 |
+
.eggs/
|
40 |
+
lib/
|
41 |
+
lib64/
|
42 |
+
parts/
|
43 |
+
sdist/
|
44 |
+
var/
|
45 |
+
wheels/
|
46 |
+
share/python-wheels/
|
47 |
+
*.egg-info/
|
48 |
+
.installed.cfg
|
49 |
+
*.egg
|
50 |
+
MANIFEST
|
51 |
+
|
52 |
+
# PyInstaller
|
53 |
+
# Usually these files are written by a python script from a template
|
54 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
55 |
+
*.manifest
|
56 |
+
*.spec
|
57 |
+
|
58 |
+
# Installer logs
|
59 |
+
pip-log.txt
|
60 |
+
pip-delete-this-directory.txt
|
61 |
+
|
62 |
+
# Unit test / coverage reports
|
63 |
+
htmlcov/
|
64 |
+
.tox/
|
65 |
+
.nox/
|
66 |
+
.coverage
|
67 |
+
.coverage.*
|
68 |
+
.cache
|
69 |
+
nosetests.xml
|
70 |
+
coverage.xml
|
71 |
+
*.cover
|
72 |
+
*.py.cover
|
73 |
+
.hypothesis/
|
74 |
+
.pytest_cache/
|
75 |
+
cover/
|
76 |
+
|
77 |
+
# Translations
|
78 |
+
*.mo
|
79 |
+
*.pot
|
80 |
+
|
81 |
+
# Django stuff:
|
82 |
+
*.log
|
83 |
+
local_settings.py
|
84 |
+
db.sqlite3
|
85 |
+
db.sqlite3-journal
|
86 |
+
|
87 |
+
# Flask stuff:
|
88 |
+
instance/
|
89 |
+
.webassets-cache
|
90 |
+
|
91 |
+
# Scrapy stuff:
|
92 |
+
.scrapy
|
93 |
+
|
94 |
+
# Sphinx documentation
|
95 |
+
docs/_build/
|
96 |
+
|
97 |
+
# PyBuilder
|
98 |
+
.pybuilder/
|
99 |
+
target/
|
100 |
+
|
101 |
+
# Jupyter Notebook
|
102 |
+
.ipynb_checkpoints
|
103 |
+
|
104 |
+
# IPython
|
105 |
+
profile_default/
|
106 |
+
ipython_config.py
|
107 |
+
|
108 |
+
# pyenv
|
109 |
+
# For a library or package, you might want to ignore these files since the code is
|
110 |
+
# intended to run in multiple environments; otherwise, check them in:
|
111 |
+
# .python-version
|
112 |
+
|
113 |
+
# pipenv
|
114 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
115 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
116 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
117 |
+
# install all needed dependencies.
|
118 |
+
#Pipfile.lock
|
119 |
+
|
120 |
+
# UV
|
121 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
122 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
123 |
+
# commonly ignored for libraries.
|
124 |
+
#uv.lock
|
125 |
+
|
126 |
+
# poetry
|
127 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
128 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
129 |
+
# commonly ignored for libraries.
|
130 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
131 |
+
#poetry.lock
|
132 |
+
#poetry.toml
|
133 |
+
|
134 |
+
# pdm
|
135 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
136 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
137 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
138 |
+
#pdm.lock
|
139 |
+
#pdm.toml
|
140 |
+
.pdm-python
|
141 |
+
.pdm-build/
|
142 |
+
|
143 |
+
# pixi
|
144 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
145 |
+
#pixi.lock
|
146 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
147 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
148 |
+
.pixi
|
149 |
+
|
150 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
151 |
+
__pypackages__/
|
152 |
+
|
153 |
+
# Celery stuff
|
154 |
+
celerybeat-schedule
|
155 |
+
celerybeat.pid
|
156 |
+
|
157 |
+
# SageMath parsed files
|
158 |
+
*.sage.py
|
159 |
+
|
160 |
+
# Environments
|
161 |
+
.env
|
162 |
+
.envrc
|
163 |
+
.venv
|
164 |
+
env/
|
165 |
+
venv/
|
166 |
+
ENV/
|
167 |
+
env.bak/
|
168 |
+
venv.bak/
|
169 |
+
|
170 |
+
# Spyder project settings
|
171 |
+
.spyderproject
|
172 |
+
.spyproject
|
173 |
+
|
174 |
+
# Rope project settings
|
175 |
+
.ropeproject
|
176 |
+
|
177 |
+
# mkdocs documentation
|
178 |
+
/site
|
179 |
+
|
180 |
+
# mypy
|
181 |
+
.mypy_cache/
|
182 |
+
.dmypy.json
|
183 |
+
dmypy.json
|
184 |
+
|
185 |
+
# Pyre type checker
|
186 |
+
.pyre/
|
187 |
+
|
188 |
+
# pytype static type analyzer
|
189 |
+
.pytype/
|
190 |
+
|
191 |
+
# Cython debug symbols
|
192 |
+
cython_debug/
|
193 |
+
|
194 |
+
# PyCharm
|
195 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
196 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
197 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
198 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
199 |
+
#.idea/
|
200 |
+
|
201 |
+
# Abstra
|
202 |
+
# Abstra is an AI-powered process automation framework.
|
203 |
+
# Ignore directories containing user credentials, local state, and settings.
|
204 |
+
# Learn more at https://abstra.io/docs
|
205 |
+
.abstra/
|
206 |
+
|
207 |
+
# Visual Studio Code
|
208 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
209 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
210 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
211 |
+
# you could uncomment the following to ignore the entire vscode folder
|
212 |
+
# .vscode/
|
213 |
+
|
214 |
+
# Ruff stuff:
|
215 |
+
.ruff_cache/
|
216 |
+
|
217 |
+
# PyPI configuration file
|
218 |
+
.pypirc
|
219 |
+
|
220 |
+
# Cursor
|
221 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
222 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
223 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
224 |
+
.cursorignore
|
225 |
+
.cursorindexingignore
|
226 |
+
|
227 |
+
# Marimo
|
228 |
+
marimo/_static/
|
229 |
+
marimo/_lsp/
|
230 |
+
__marimo__/
|
.vscode/settings.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"terminal.integrated.cwd": "c:/Project/lily_generate_project/lily_generate_package"
|
3 |
+
}
|
4 |
+
|
DEPLOYMENT_CHECKLIST.md
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๐ Lily LLM API - Hugging Face Spaces ๋ฐฐํฌ ์ฒดํฌ๋ฆฌ์คํธ
|
2 |
+
|
3 |
+
## โ
๋ฐฐํฌ ์ ์ฒดํฌ๋ฆฌ์คํธ
|
4 |
+
|
5 |
+
### ๐ ํ์ ํ์ผ ํ์ธ
|
6 |
+
- [ ] `Dockerfile.huggingface` - Docker ์ค์ ํ์ผ
|
7 |
+
- [ ] `app_huggingface.py` - Hugging Face Spaces ์ง์
์
|
8 |
+
- [ ] `requirements_full.txt` - ์์ ํ ์์กด์ฑ ํจํค์ง ๋ชฉ๋ก
|
9 |
+
- [ ] `README_huggingface.md` - ํ๋ก์ ํธ ์ค๋ช
๋ฌธ์
|
10 |
+
- [ ] `.dockerignore` - Docker ๋น๋ ์ ์ธ ํ์ผ ๋ชฉ๋ก
|
11 |
+
|
12 |
+
### ๐ง ํ๋ก์ ํธ ๊ตฌ์กฐ ํ์ธ
|
13 |
+
- [ ] `lily_llm_api/` - FastAPI ์๋ฒ ์ฝ๋
|
14 |
+
- [ ] `lily_llm_core/` - ํต์ฌ RAG ๋ฐ AI ๋ก์ง
|
15 |
+
- [ ] `lily_llm_utils/` - ์ ํธ๋ฆฌํฐ ํจ์๋ค
|
16 |
+
- [ ] ๋ชจ๋ธ ํ์ผ๋ค์ด ์ฌ๋ฐ๋ฅธ ๊ฒฝ๋ก์ ์์น
|
17 |
+
|
18 |
+
### ๐ Hugging Face ๊ณ์ ์ค๋น
|
19 |
+
- [ ] Hugging Face ๊ณ์ ์์ฑ ์๋ฃ
|
20 |
+
- [ ] Write ๊ถํ์ด ์๋ Access Token ์์ฑ
|
21 |
+
- [ ] ํ ํฐ์ ์์ ํ ๊ณณ์ ์ ์ฅ
|
22 |
+
|
23 |
+
## ๐ ๋ฐฐํฌ ๋จ๊ณ
|
24 |
+
|
25 |
+
### 1๋จ๊ณ: Hugging Face Space ์์ฑ
|
26 |
+
- [ ] [Hugging Face Spaces](https://huggingface.co/spaces) ์ ์
|
27 |
+
- [ ] "Create new Space" ํด๋ฆญ
|
28 |
+
- [ ] ๋ค์ ์ค์ ์ผ๋ก ์์ฑ:
|
29 |
+
- [ ] **Space name**: `lily-llm-api` (๋๋ ์ํ๋ ์ด๋ฆ)
|
30 |
+
- [ ] **SDK**: `Docker` ์ ํ
|
31 |
+
- [ ] **Hardware**: `CPU basic` (๋ฌด๋ฃ) ๋๋ `CPU upgrade` (์ ๋ฃ)
|
32 |
+
- [ ] **Visibility**: `Public` ๋๋ `Private`
|
33 |
+
|
34 |
+
### 2๋จ๊ณ: ํ์ผ ์
๋ก๋
|
35 |
+
- [ ] Space ์ ์ฅ์ ํด๋ก ๋๋ ์น ์ธํฐํ์ด์ค ์ฌ์ฉ
|
36 |
+
- [ ] ํ์ ํ์ผ๋ค์ ์ฌ๋ฐ๋ฅธ ์ด๋ฆ์ผ๋ก ๋ณต์ฌ:
|
37 |
+
- [ ] `Dockerfile.huggingface` โ `Dockerfile`
|
38 |
+
- [ ] `requirements_full.txt` โ `requirements.txt`
|
39 |
+
- [ ] `README_huggingface.md` โ `README.md`
|
40 |
+
- [ ] ์์ค ์ฝ๋ ๋๋ ํ ๋ฆฌ๋ค ๋ณต์ฌ
|
41 |
+
- [ ] Git commit ๋ฐ push (Git ๋ฐฉ์ ์ฌ์ฉ ์)
|
42 |
+
|
43 |
+
### 3๋จ๊ณ: ํ๊ฒฝ ๋ณ์ ์ค์
|
44 |
+
Space Settings > Variables์์ ์ค์ :
|
45 |
+
- [ ] `HOST=0.0.0.0`
|
46 |
+
- [ ] `PORT=7860`
|
47 |
+
- [ ] `PYTHONPATH=/app`
|
48 |
+
- [ ] `PYTHONUNBUFFERED=1`
|
49 |
+
- [ ] `TOKENIZERS_PARALLELISM=false`
|
50 |
+
- [ ] `OMP_NUM_THREADS=1`
|
51 |
+
- [ ] `MKL_NUM_THREADS=1`
|
52 |
+
|
53 |
+
### 4๋จ๊ณ: ๋น๋ ๋ฐ ๋ฐฐํฌ ํ์ธ
|
54 |
+
- [ ] Space ํ์ด์ง์์ ๋น๋ ๋ก๊ทธ ํ์ธ
|
55 |
+
- [ ] ์ค๋ฅ ๋ฐ์ ์ ๋ก๊ทธ ๋ถ์ ๋ฐ ์์
|
56 |
+
- [ ] ๋น๋ ์๋ฃ ํ ์ฑ ์คํ ํ์ธ
|
57 |
+
|
58 |
+
## ๐งช ํ
์คํธ ์ฒดํฌ๋ฆฌ์คํธ
|
59 |
+
|
60 |
+
### API ๊ธฐ๋ณธ ํ
์คํธ
|
61 |
+
- [ ] Health Check: `GET /health`
|
62 |
+
```bash
|
63 |
+
curl https://YOUR_USERNAME-lily-llm-api.hf.space/health
|
64 |
+
```
|
65 |
+
|
66 |
+
- [ ] ๋ชจ๋ธ ๋ชฉ๋ก: `GET /models`
|
67 |
+
```bash
|
68 |
+
curl https://YOUR_USERNAME-lily-llm-api.hf.space/models
|
69 |
+
```
|
70 |
+
|
71 |
+
- [ ] ํ
์คํธ ์์ฑ: `POST /generate`
|
72 |
+
```bash
|
73 |
+
curl -X POST https://YOUR_USERNAME-lily-llm-api.hf.space/generate \
|
74 |
+
-F "prompt=์๋
ํ์ธ์! ํ
์คํธ ๋ฉ์์ง์
๋๋ค."
|
75 |
+
```
|
76 |
+
|
77 |
+
### ๊ณ ๊ธ ๊ธฐ๋ฅ ํ
์คํธ
|
78 |
+
- [ ] ์ด๋ฏธ์ง ์ฒ๋ฆฌ ํ
์คํธ
|
79 |
+
- [ ] RAG ์์คํ
ํ
์คํธ (๋ฌธ์ ์
๋ก๋ ๋ฐ ์ง์)
|
80 |
+
- [ ] ๋ฉํฐ๋ชจ๋ฌ ๊ธฐ๋ฅ ํ
์คํธ
|
81 |
+
|
82 |
+
### ์ฑ๋ฅ ํ
์คํธ
|
83 |
+
- [ ] ์๋ต ์๊ฐ ์ธก์
|
84 |
+
- [ ] ๋์ ์์ฒญ ์ฒ๋ฆฌ ํ์ธ
|
85 |
+
- [ ] ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ๋ชจ๋ํฐ๋ง
|
86 |
+
|
87 |
+
## ๐ Hearth Chat ์ฐ๋ ์ค๋น
|
88 |
+
|
89 |
+
### URL ๊ธฐ๋ก
|
90 |
+
๋ฐฐํฌ ์๋ฃ ํ ๋ค์ URL ๊ธฐ๋ก:
|
91 |
+
- [ ] Hugging Face Space URL: `https://YOUR_USERNAME-lily-llm-api.hf.space`
|
92 |
+
- [ ] API ๋ฌธ์ URL: `https://YOUR_USERNAME-lily-llm-api.hf.space/docs`
|
93 |
+
|
94 |
+
### Hearth Chat ์ค์ ์
๋ฐ์ดํธ
|
95 |
+
- [ ] AI ์ค์ ๋ชจ๋ฌ์ Lily LLM ์ต์
์ถ๊ฐ
|
96 |
+
- [ ] ๋ฐฑ์๋ consumers.py์ Hugging Face API ํธ์ถ ๋ก์ง ์ถ๊ฐ
|
97 |
+
- [ ] ํ๊ฒฝ ๋ณ์์ Lily LLM API URL ์ค์
|
98 |
+
|
99 |
+
## โ ๋ฌธ์ ํด๊ฒฐ
|
100 |
+
|
101 |
+
### ์ผ๋ฐ์ ์ธ ์ค๋ฅ์ ํด๊ฒฐ์ฑ
|
102 |
+
|
103 |
+
**๋น๋ ์คํจ**
|
104 |
+
- [ ] `requirements.txt` ์์กด์ฑ ํ์ธ
|
105 |
+
- [ ] `Dockerfile` ๋ฌธ๋ฒ ์ค๋ฅ ํ์ธ
|
106 |
+
- [ ] `.dockerignore` ํ์ผ ํ์ธ
|
107 |
+
|
108 |
+
**๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ**
|
109 |
+
- [ ] ๋ถํ์ํ ํจํค์ง ์ ๊ฑฐ
|
110 |
+
- [ ] ๋ชจ๋ธ ํฌ๊ธฐ ์ต์ ํ
|
111 |
+
- [ ] Hardware ์
๊ทธ๋ ์ด๋ ๊ณ ๋ ค
|
112 |
+
|
113 |
+
**๋ชจ๋ Import ์ค๋ฅ**
|
114 |
+
- [ ] `PYTHONPATH` ํ๊ฒฝ ๋ณ์ ํ์ธ
|
115 |
+
- [ ] ํ์ผ ๊ฒฝ๋ก ๋ฐ ๊ตฌ์กฐ ํ์ธ
|
116 |
+
- [ ] ์์กด์ฑ ํจํค์ง ๋ฒ์ ํ์ธ
|
117 |
+
|
118 |
+
**API ์๋ต ์์**
|
119 |
+
- [ ] ํฌํธ ์ค์ ํ์ธ (7860)
|
120 |
+
- [ ] ๋ฐฉํ๋ฒฝ ์ค์ ํ์ธ
|
121 |
+
- [ ] ๋ก๊ทธ์์ ์ค๋ฅ ๋ฉ์์ง ํ์ธ
|
122 |
+
|
123 |
+
## ๐ ๋ชจ๋ํฐ๋ง ์ค์
|
124 |
+
|
125 |
+
### ๋ฐฐํฌ ํ ๋ชจ๋ํฐ๋ง
|
126 |
+
- [ ] Space ๋์๋ณด๋์์ ์ฌ์ฉ๋ ํ์ธ
|
127 |
+
- [ ] ๋ก๊ทธ ์ ๊ธฐ์ ํ์ธ
|
128 |
+
- [ ] ์ฑ๋ฅ ๋ฉํธ๋ฆญ ๋ชจ๋ํฐ๋ง
|
129 |
+
- [ ] ์ฌ์ฉ์ ํผ๋๋ฐฑ ์์ง
|
130 |
+
|
131 |
+
### ์ ์ง๋ณด์ ๊ณํ
|
132 |
+
- [ ] ์ ๊ธฐ์ ์
๋ฐ์ดํธ ๊ณํ ์๋ฆฝ
|
133 |
+
- [ ] ๋ฐฑ์
์ ๋ต ์๋ฆฝ
|
134 |
+
- [ ] ์ฅ์ ๋์ ๊ณํ ์๋ฆฝ
|
135 |
+
|
136 |
+
---
|
137 |
+
|
138 |
+
## ๐ ๋ฐฐํฌ ์๋ฃ!
|
139 |
+
|
140 |
+
๋ชจ๋ ์ฒดํฌ๋ฆฌ์คํธ ํญ๋ชฉ์ ์๋ฃํ๋ฉด Lily LLM API๊ฐ Hugging Face Spaces์์ ์ฑ๊ณต์ ์ผ๋ก ์คํ๋ฉ๋๋ค.
|
141 |
+
|
142 |
+
**๋ค์ ๋จ๊ณ**: [HEARTH_CHAT_INTEGRATION.md](./HEARTH_CHAT_INTEGRATION.md)๋ฅผ ์ฐธ์กฐํ์ฌ Railway Hearth Chat๊ณผ ์ฐ๋์ ์งํํ์ธ์.
|
143 |
+
|
144 |
+
---
|
145 |
+
|
146 |
+
## ๐ ์ง์
|
147 |
+
|
148 |
+
๋ฐฐํฌ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ๋ฉด:
|
149 |
+
1. ๋ก๊ทธ ํ์ธ ๋ฐ ๋ถ์
|
150 |
+
2. [DEPLOYMENT_GUIDE.md](./DEPLOYMENT_GUIDE.md) ์์ธ ๊ฐ์ด๋ ์ฐธ์กฐ
|
151 |
+
3. Hugging Face Community ํฌ๋ผ ํ์ฉ
|
152 |
+
4. GitHub Issues๋ฅผ ํตํ ๊ธฐ์ ์ง์
|
DEPLOYMENT_GUIDE.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API - Hugging Face Spaces ๋ฐฐํฌ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ๋ฐฐํฌ ๋จ๊ณ๋ณ ๊ฐ์ด๋
|
4 |
+
|
5 |
+
### 1. ์ฌ์ ์ค๋น
|
6 |
+
|
7 |
+
#### 1.1 Hugging Face ๊ณ์ ๋ฐ ํ ํฐ
|
8 |
+
1. [Hugging Face](https://huggingface.co) ๊ณ์ ์์ฑ
|
9 |
+
2. [Settings > Access Tokens](https://huggingface.co/settings/tokens)์์ Write ๊ถํ ํ ํฐ ์์ฑ
|
10 |
+
3. ํ ํฐ์ ์์ ํ ๊ณณ์ ์ ์ฅ
|
11 |
+
|
12 |
+
#### 1.2 ํ์ํ ํ์ผ๋ค ํ์ธ
|
13 |
+
- `Dockerfile.huggingface` - Docker ์ค์
|
14 |
+
- `app_huggingface.py` - ์ง์
์
|
15 |
+
- `requirements_full.txt` - ์์กด์ฑ ํจํค์ง
|
16 |
+
- `README_huggingface.md` - ํ๋ก์ ํธ ์ค๋ช
|
17 |
+
- `.dockerignore` - Docker ๋น๋ ์ ์ธ ํ์ผ
|
18 |
+
|
19 |
+
### 2. Hugging Face Spaces ์์ฑ
|
20 |
+
|
21 |
+
#### 2.1 Space ์์ฑ
|
22 |
+
1. [Hugging Face Spaces](https://huggingface.co/spaces) ์ ์
|
23 |
+
2. "Create new Space" ํด๋ฆญ
|
24 |
+
3. ๋ค์ ์ค์ ์ผ๋ก Space ์์ฑ:
|
25 |
+
- **Owner**: ๋ณธ์ธ ๊ณ์
|
26 |
+
- **Space name**: `lily-llm-api`
|
27 |
+
- **License**: `MIT`
|
28 |
+
- **Select the Space SDK**: `Docker`
|
29 |
+
- **Space hardware**: `CPU basic` (๋ฌด๋ฃ) ๋๋ `CPU upgrade` (์ ๋ฃ, ๋ ๋น ๋ฆ)
|
30 |
+
- **Visibility**: `Public` ๋๋ `Private`
|
31 |
+
|
32 |
+
#### 2.2 Space ์ค์
|
33 |
+
Space ์์ฑ ํ Settings์์:
|
34 |
+
- **Variables**: ํ์ํ ํ๊ฒฝ ๋ณ์ ์ค์
|
35 |
+
- **Secrets**: API ํค ๋ฑ ๋ฏผ๊ฐํ ์ ๋ณด ์ค์
|
36 |
+
|
37 |
+
### 3. ์ฝ๋ ๋ฐฐํฌ
|
38 |
+
|
39 |
+
#### 3.1 Git ๋ฐฉ์ (๊ถ์ฅ)
|
40 |
+
|
41 |
+
```bash
|
42 |
+
# 1. Space ์ ์ฅ์ ํด๋ก
|
43 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/lily-llm-api
|
44 |
+
cd lily-llm-api
|
45 |
+
|
46 |
+
# 2. ํ์ํ ํ์ผ๋ค ๋ณต์ฌ
|
47 |
+
cp /path/to/lily_generate_package/Dockerfile.huggingface ./Dockerfile
|
48 |
+
cp /path/to/lily_generate_package/app_huggingface.py ./
|
49 |
+
cp /path/to/lily_generate_package/requirements_full.txt ./requirements.txt
|
50 |
+
cp /path/to/lily_generate_package/README_huggingface.md ./README.md
|
51 |
+
cp /path/to/lily_generate_package/.dockerignore ./
|
52 |
+
|
53 |
+
# 3. ํ๋ก์ ํธ ์์ค ์ฝ๋ ๋ณต์ฌ
|
54 |
+
cp -r /path/to/lily_generate_package/lily_llm_api ./
|
55 |
+
cp -r /path/to/lily_generate_package/lily_llm_core ./
|
56 |
+
cp -r /path/to/lily_generate_package/lily_llm_utils ./
|
57 |
+
|
58 |
+
# 4. Git ์ปค๋ฐ ๋ฐ ํธ์
|
59 |
+
git add .
|
60 |
+
git commit -m "Initial deployment of Lily LLM API"
|
61 |
+
git push
|
62 |
+
```
|
63 |
+
|
64 |
+
#### 3.2 ์น ์ธํฐํ์ด์ค ๋ฐฉ์
|
65 |
+
|
66 |
+
1. Hugging Face Space ํ์ด์ง์์ "Files" ํญ ํด๋ฆญ
|
67 |
+
2. "Add file" > "Upload files" ํด๋ฆญ
|
68 |
+
3. ํ์ํ ํ์ผ๋ค์ ๋๋๊ทธ ์ค ๋๋กญ์ผ๋ก ์
๋ก๋
|
69 |
+
4. ์ปค๋ฐ ๋ฉ์์ง ์์ฑ ํ "Commit changes" ํด๋ฆญ
|
70 |
+
|
71 |
+
### 4. ํ๊ฒฝ ๋ณ์ ์ค์
|
72 |
+
|
73 |
+
Space Settings > Variables์์ ๋ค์ ํ๊ฒฝ ๋ณ์๋ค ์ค์ :
|
74 |
+
|
75 |
+
```bash
|
76 |
+
# ์๋ฒ ์ค์
|
77 |
+
HOST=0.0.0.0
|
78 |
+
PORT=7860
|
79 |
+
PYTHONPATH=/app
|
80 |
+
PYTHONUNBUFFERED=1
|
81 |
+
|
82 |
+
# ๋ชจ๋ธ ์ค์
|
83 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
84 |
+
MAX_NEW_TOKENS=256
|
85 |
+
TEMPERATURE=0.7
|
86 |
+
|
87 |
+
# ์บ์ ์ค์
|
88 |
+
TRANSFORMERS_CACHE=/app/cache/transformers
|
89 |
+
HF_HOME=/app/cache/huggingface
|
90 |
+
TORCH_HOME=/app/cache/torch
|
91 |
+
TOKENIZERS_PARALLELISM=false
|
92 |
+
|
93 |
+
# ์ฑ๋ฅ ์ต์ ํ
|
94 |
+
OMP_NUM_THREADS=1
|
95 |
+
MKL_NUM_THREADS=1
|
96 |
+
```
|
97 |
+
|
98 |
+
### 5. ๋ฐฐํฌ ํ์ธ
|
99 |
+
|
100 |
+
#### 5.1 ๋น๋ ๋ก๊ทธ ํ์ธ
|
101 |
+
1. Space ํ์ด์ง์์ "Logs" ํญ ํด๋ฆญ
|
102 |
+
2. Docker ๋น๋ ๋ฐ ์คํ ๋ก๊ทธ ํ์ธ
|
103 |
+
3. ์ค๋ฅ ๋ฐ์ ์ ๋ก๊ทธ๋ฅผ ํตํด ๋ฌธ์ ํด๊ฒฐ
|
104 |
+
|
105 |
+
#### 5.2 API ํ
์คํธ
|
106 |
+
|
107 |
+
๋ฐฐํฌ ์๋ฃ ํ ๋ค์๊ณผ ๊ฐ์ด ํ
์คํธ:
|
108 |
+
|
109 |
+
```python
|
110 |
+
import requests
|
111 |
+
|
112 |
+
# Health check
|
113 |
+
response = requests.get("https://YOUR_USERNAME-lily-llm-api.hf.space/health")
|
114 |
+
print(response.json())
|
115 |
+
|
116 |
+
# ํ
์คํธ ์์ฑ ํ
์คํธ
|
117 |
+
response = requests.post(
|
118 |
+
"https://YOUR_USERNAME-lily-llm-api.hf.space/generate",
|
119 |
+
data={"prompt": "์๋
ํ์ธ์! ํ
์คํธ ๋ฉ์์ง์
๋๋ค."}
|
120 |
+
)
|
121 |
+
print(response.json())
|
122 |
+
```
|
123 |
+
|
124 |
+
### 6. ์ฑ๋ฅ ์ต์ ํ
|
125 |
+
|
126 |
+
#### 6.1 ํ๋์จ์ด ์
๊ทธ๋ ์ด๋
|
127 |
+
- ๋ฌด๋ฃ CPU basic: ์ ํ์ ์ฑ๋ฅ
|
128 |
+
- ์ ๋ฃ CPU upgrade: ๋ ๋น ๋ฅธ ์ฒ๋ฆฌ
|
129 |
+
- GPU ์ต์
: ๋์ฉ๋ ๋ชจ๋ธ ์ฒ๋ฆฌ ์ ํ์
|
130 |
+
|
131 |
+
#### 6.2 ๋ชจ๋ธ ์ต์ ํ
|
132 |
+
```python
|
133 |
+
# app_huggingface.py์์ ๋ชจ๋ธ ๋ก๋ฉ ์ต์ ํ
|
134 |
+
model = AutoModelForCausalLM.from_pretrained(
|
135 |
+
model_name,
|
136 |
+
torch_dtype=torch.float16, # ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ๊ฐ์
|
137 |
+
device_map="auto", # ์๋ ๋๋ฐ์ด์ค ๋ฐฐ์น
|
138 |
+
low_cpu_mem_usage=True # CPU ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ
|
139 |
+
)
|
140 |
+
```
|
141 |
+
|
142 |
+
### 7. ๋ฌธ์ ํด๊ฒฐ
|
143 |
+
|
144 |
+
#### 7.1 ์ผ๋ฐ์ ์ธ ์ค๋ฅ
|
145 |
+
|
146 |
+
**๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ ์ค๋ฅ**
|
147 |
+
```bash
|
148 |
+
# requirements.txt์์ ๋ถํ์ํ ํจํค์ง ์ ๊ฑฐ
|
149 |
+
# ๋ชจ๋ธ ํฌ๊ธฐ ์ถ์ ๋๋ ์์ํ ์ ์ฉ
|
150 |
+
```
|
151 |
+
|
152 |
+
**๋น๋ ์๊ฐ ์ด๊ณผ**
|
153 |
+
```bash
|
154 |
+
# .dockerignore ํ์ผ๋ก ๋ถํ์ํ ํ์ผ ์ ์ธ
|
155 |
+
# multi-stage build ์ฌ์ฉ์ผ๋ก ๋น๋ ์ต์ ํ
|
156 |
+
```
|
157 |
+
|
158 |
+
**๋ชจ๋ import ์ค๋ฅ**
|
159 |
+
```bash
|
160 |
+
# PYTHONPATH ํ๊ฒฝ ๋ณ์ ํ์ธ
|
161 |
+
# requirements.txt ์์กด์ฑ ํ์ธ
|
162 |
+
```
|
163 |
+
|
164 |
+
#### 7.2 ๋ก๊ทธ ๋ถ์
|
165 |
+
```bash
|
166 |
+
# ๋น๋ ๋ก๊ทธ์์ ์ค๋ฅ ์ฐพ๊ธฐ
|
167 |
+
grep -i error build.log
|
168 |
+
|
169 |
+
# ๋ฐํ์ ๋ก๊ทธ์์ ๋ฌธ์ ํ์ธ
|
170 |
+
tail -f app.log
|
171 |
+
```
|
172 |
+
|
173 |
+
### 8. Railway Hearth Chat ์ฐ๋ ์ค๋น
|
174 |
+
|
175 |
+
๋ฐฐํฌ๋ Hugging Face Space URL์ ๊ธฐ๋กํด๋์ธ์:
|
176 |
+
```
|
177 |
+
https://YOUR_USERNAME-lily-llm-api.hf.space
|
178 |
+
```
|
179 |
+
|
180 |
+
์ด URL์ Hearth Chat์ AI ์ค์ ์์ Lily LLM API URL๋ก ์ฌ์ฉํ๊ฒ ๋ฉ๋๋ค.
|
181 |
+
|
182 |
+
### 9. ์ ์ง๋ณด์
|
183 |
+
|
184 |
+
#### 9.1 ์
๋ฐ์ดํธ ๋ฐฐํฌ
|
185 |
+
```bash
|
186 |
+
# ์ฝ๋ ์์ ํ
|
187 |
+
git add .
|
188 |
+
git commit -m "Update: description of changes"
|
189 |
+
git push
|
190 |
+
```
|
191 |
+
|
192 |
+
#### 9.2 ๋ชจ๋ํฐ๋ง
|
193 |
+
- Space ๋์๋ณด๋์์ ์ฌ์ฉ๋ ๋ชจ๋ํฐ๋ง
|
194 |
+
- ๋ก๊ทธ๋ฅผ ํตํ ์ค๋ฅ ์ถ์
|
195 |
+
- ์ฑ๋ฅ ๋ฉํธ๋ฆญ ํ์ธ
|
196 |
+
|
197 |
+
---
|
198 |
+
|
199 |
+
## ๐ ์ง์
|
200 |
+
|
201 |
+
๋ฐฐํฌ ์ค ๋ฌธ์ ๊ฐ ๋ฐ์ํ๋ฉด:
|
202 |
+
1. Hugging Face ๊ณต์ ๋ฌธ์ ์ฐธ์กฐ
|
203 |
+
2. Community ํฌ๋ผ์์ ๋์ ์์ฒญ
|
204 |
+
3. GitHub Issues๋ฅผ ํตํ ๊ธฐ์ ์ง์
|
Dockerfile
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hugging Face Spaces์ฉ Lily LLM API Server Dockerfile
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Hugging Face Spaces ํ๊ฒฝ ๋ณ์
|
5 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
6 |
+
ENV GRADIO_SERVER_PORT=7860
|
7 |
+
ENV PYTHONPATH=/app
|
8 |
+
ENV PYTHONUNBUFFERED=1
|
9 |
+
ENV TOKENIZERS_PARALLELISM=false
|
10 |
+
|
11 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
# ์์คํ
์์กด์ฑ ์ค์น
|
15 |
+
RUN apt-get update && apt-get install -y \
|
16 |
+
build-essential \
|
17 |
+
curl \
|
18 |
+
git \
|
19 |
+
wget \
|
20 |
+
ffmpeg \
|
21 |
+
libsm6 \
|
22 |
+
libxext6 \
|
23 |
+
libfontconfig1 \
|
24 |
+
libxrender1 \
|
25 |
+
libgl1-mesa-glx \
|
26 |
+
&& rm -rf /var/lib/apt/lists/*
|
27 |
+
|
28 |
+
# Python ์์กด์ฑ ์ค์น (์บ์ฑ ์ต์ ํ)
|
29 |
+
COPY requirements_full.txt requirements.txt
|
30 |
+
RUN pip install --no-cache-dir --upgrade pip
|
31 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
32 |
+
|
33 |
+
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
|
34 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
|
35 |
+
|
36 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์ฝ๋ ๋ณต์ฌ
|
37 |
+
COPY . .
|
38 |
+
|
39 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
40 |
+
RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
|
41 |
+
|
42 |
+
# ๊ถํ ์ค์
|
43 |
+
RUN chmod +x /app/*.py
|
44 |
+
|
45 |
+
# Hugging Face ์บ์ ๋๋ ํ ๋ฆฌ ํ๊ฒฝ ๋ณ์ ์ค์
|
46 |
+
ENV TRANSFORMERS_CACHE=/app/cache/transformers
|
47 |
+
ENV HF_HOME=/app/cache/huggingface
|
48 |
+
ENV HF_HUB_CACHE=/app/cache/huggingface
|
49 |
+
|
50 |
+
# ํ๊ฒฝ ๊ฐ์ง ์ค์
|
51 |
+
ENV IS_LOCAL=false
|
52 |
+
ENV ENVIRONMENT=production
|
53 |
+
ENV DOCKER_ENV=server
|
54 |
+
|
55 |
+
# Hugging Face Spaces์ฉ ์ฑ ์์์ ์์ฑ
|
56 |
+
COPY app_huggingface.py /app/app_huggingface.py
|
57 |
+
|
58 |
+
# ํฌํธ ๋
ธ์ถ (Hugging Face Spaces๋ 7860 ํฌํธ ์ฌ์ฉ)
|
59 |
+
EXPOSE 7860
|
60 |
+
|
61 |
+
# ํฌ์ค์ฒดํฌ
|
62 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
63 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
64 |
+
|
65 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์คํ
|
66 |
+
CMD ["python", "app_huggingface.py"]
|
Dockerfile.blank
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API Server Dockerfile
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Set working directory
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Install system dependencies
|
8 |
+
RUN apt-get update && apt-get install -y \
|
9 |
+
build-essential \
|
10 |
+
curl \
|
11 |
+
git \
|
12 |
+
&& rm -rf /var/lib/apt/lists/*
|
13 |
+
|
14 |
+
# Copy requirements first for better caching
|
15 |
+
COPY requirements.txt .
|
16 |
+
|
17 |
+
# Install Python dependencies
|
18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
19 |
+
|
20 |
+
# Download NLTK data
|
21 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
|
22 |
+
|
23 |
+
# Copy application code
|
24 |
+
COPY . .
|
25 |
+
|
26 |
+
# Create necessary directories
|
27 |
+
RUN mkdir -p /app/data /app/logs /app/models /app/uploads
|
28 |
+
|
29 |
+
# Set environment variables
|
30 |
+
ENV PYTHONPATH=/app
|
31 |
+
ENV PYTHONUNBUFFERED=1
|
32 |
+
ENV HOST=0.0.0.0
|
33 |
+
ENV PORT=8001
|
34 |
+
|
35 |
+
# Expose port
|
36 |
+
EXPOSE 8001
|
37 |
+
|
38 |
+
# Health check
|
39 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
40 |
+
CMD curl -f http://localhost:8001/health || exit 1
|
41 |
+
|
42 |
+
# Run the application
|
43 |
+
CMD ["python", "run_server_v2.py"]
|
Dockerfile.gpu
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GPU ํ๊ฒฝ์ ์ํ Dockerfile
|
2 |
+
FROM nvidia/cuda:11.8-devel-ubuntu20.04
|
3 |
+
|
4 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
5 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
6 |
+
ENV PYTHONUNBUFFERED=1
|
7 |
+
ENV CUDA_VISIBLE_DEVICES=0
|
8 |
+
|
9 |
+
# ์์คํ
ํจํค์ง ์
๋ฐ์ดํธ ๋ฐ ์ค์น
|
10 |
+
RUN apt-get update && apt-get install -y \
|
11 |
+
python3.9 \
|
12 |
+
python3.9-dev \
|
13 |
+
python3-pip \
|
14 |
+
git \
|
15 |
+
wget \
|
16 |
+
curl \
|
17 |
+
build-essential \
|
18 |
+
libgl1-mesa-glx \
|
19 |
+
libglib2.0-0 \
|
20 |
+
libsm6 \
|
21 |
+
libxext6 \
|
22 |
+
libxrender-dev \
|
23 |
+
libgomp1 \
|
24 |
+
&& rm -rf /var/lib/apt/lists/*
|
25 |
+
|
26 |
+
# Python 3.9์ ๊ธฐ๋ณธ์ผ๋ก ์ค์
|
27 |
+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
|
28 |
+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
|
29 |
+
|
30 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
31 |
+
WORKDIR /app
|
32 |
+
|
33 |
+
# Python ์์กด์ฑ ํ์ผ ๋ณต์ฌ
|
34 |
+
COPY requirements.txt .
|
35 |
+
|
36 |
+
# Python ํจํค์ง ์ค์น
|
37 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
38 |
+
|
39 |
+
# GPU ๊ด๋ จ ํจํค์ง ์ค์น
|
40 |
+
RUN pip3 install --no-cache-dir \
|
41 |
+
torch==2.0.1+cu118 \
|
42 |
+
torchvision==0.15.2+cu118 \
|
43 |
+
torchaudio==2.0.2+cu118 \
|
44 |
+
--index-url https://download.pytorch.org/whl/cu118
|
45 |
+
|
46 |
+
# ํ๋ก์ ํธ ํ์ผ ๋ณต์ฌ
|
47 |
+
COPY . .
|
48 |
+
|
49 |
+
# ํฌํธ ์ค์
|
50 |
+
EXPOSE 8001
|
51 |
+
|
52 |
+
# ํฌ์ค์ฒดํฌ ์ถ๊ฐ
|
53 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
54 |
+
CMD curl -f http://localhost:8001/health || exit 1
|
55 |
+
|
56 |
+
# ์คํ ๋ช
๋ น
|
57 |
+
CMD ["python3", "run_server_v2.py"]
|
Dockerfile.huggingface
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hugging Face Spaces์ฉ Lily LLM API Server Dockerfile
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Hugging Face Spaces ํ๊ฒฝ ๋ณ์
|
5 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
6 |
+
ENV GRADIO_SERVER_PORT=7860
|
7 |
+
ENV PYTHONPATH=/app
|
8 |
+
ENV PYTHONUNBUFFERED=1
|
9 |
+
ENV TOKENIZERS_PARALLELISM=false
|
10 |
+
|
11 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
# ์์คํ
์์กด์ฑ ์ค์น
|
15 |
+
RUN apt-get update && apt-get install -y \
|
16 |
+
build-essential \
|
17 |
+
curl \
|
18 |
+
git \
|
19 |
+
wget \
|
20 |
+
ffmpeg \
|
21 |
+
libsm6 \
|
22 |
+
libxext6 \
|
23 |
+
libfontconfig1 \
|
24 |
+
libxrender1 \
|
25 |
+
libgl1-mesa-glx \
|
26 |
+
&& rm -rf /var/lib/apt/lists/*
|
27 |
+
|
28 |
+
# Python ์์กด์ฑ ์ค์น (์บ์ฑ ์ต์ ํ)
|
29 |
+
COPY requirements_full.txt requirements.txt
|
30 |
+
RUN pip install --no-cache-dir --upgrade pip
|
31 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
32 |
+
|
33 |
+
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
|
34 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
|
35 |
+
|
36 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์ฝ๋ ๋ณต์ฌ
|
37 |
+
COPY . .
|
38 |
+
|
39 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
40 |
+
RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
|
41 |
+
|
42 |
+
# ๊ถํ ์ค์
|
43 |
+
RUN chmod +x /app/*.py
|
44 |
+
|
45 |
+
# Hugging Face ์บ์ ๋๋ ํ ๋ฆฌ ํ๊ฒฝ ๋ณ์ ์ค์
|
46 |
+
ENV TRANSFORMERS_CACHE=/app/cache/transformers
|
47 |
+
ENV HF_HOME=/app/cache/huggingface
|
48 |
+
ENV HF_HUB_CACHE=/app/cache/huggingface
|
49 |
+
|
50 |
+
# ํ๊ฒฝ ๊ฐ์ง ์ค์
|
51 |
+
ENV IS_LOCAL=false
|
52 |
+
ENV ENVIRONMENT=production
|
53 |
+
ENV DOCKER_ENV=server
|
54 |
+
|
55 |
+
# Hugging Face Spaces์ฉ ์ฑ ์์์ ์์ฑ
|
56 |
+
COPY app_huggingface.py /app/app_huggingface.py
|
57 |
+
|
58 |
+
# ํฌํธ ๋
ธ์ถ (Hugging Face Spaces๋ 7860 ํฌํธ ์ฌ์ฉ)
|
59 |
+
EXPOSE 7860
|
60 |
+
|
61 |
+
# ํฌ์ค์ฒดํฌ
|
62 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
63 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
64 |
+
|
65 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์คํ
|
66 |
+
CMD ["python", "app_huggingface.py"]
|
Dockerfile.huggingface.predownload
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hugging Face Spaces์ฉ Lily LLM API Server Dockerfile (๋ชจ๋ธ ์ฌ์ ๋ค์ด๋ก๋ ๋ฒ์ )
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Hugging Face Spaces ํ๊ฒฝ ๋ณ์
|
5 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
6 |
+
ENV GRADIO_SERVER_PORT=7860
|
7 |
+
ENV PYTHONPATH=/app
|
8 |
+
ENV PYTHONUNBUFFERED=1
|
9 |
+
ENV TOKENIZERS_PARALLELISM=false
|
10 |
+
|
11 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
# ์์คํ
์์กด์ฑ ์ค์น
|
15 |
+
RUN apt-get update && apt-get install -y \
|
16 |
+
build-essential \
|
17 |
+
curl \
|
18 |
+
git \
|
19 |
+
wget \
|
20 |
+
ffmpeg \
|
21 |
+
libsm6 \
|
22 |
+
libxext6 \
|
23 |
+
libfontconfig1 \
|
24 |
+
libxrender1 \
|
25 |
+
libgl1-mesa-glx \
|
26 |
+
&& rm -rf /var/lib/apt/lists/*
|
27 |
+
|
28 |
+
# Python ์์กด์ฑ ์ค์น (์บ์ฑ ์ต์ ํ)
|
29 |
+
COPY requirements_full.txt requirements.txt
|
30 |
+
RUN pip install --no-cache-dir --upgrade pip
|
31 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
32 |
+
|
33 |
+
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
|
34 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
|
35 |
+
|
36 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
37 |
+
RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
|
38 |
+
|
39 |
+
# Hugging Face ์บ์ ๋๋ ํ ๋ฆฌ ํ๊ฒฝ ๋ณ์ ์ค์
|
40 |
+
ENV TRANSFORMERS_CACHE=/app/cache/transformers
|
41 |
+
ENV HF_HOME=/app/cache/huggingface
|
42 |
+
ENV HF_HUB_CACHE=/app/cache/huggingface
|
43 |
+
|
44 |
+
# ๋ชจ๋ธ ๋ค์ด๋ก๋ ์คํฌ๋ฆฝํธ ๋ณต์ฌ ๋ฐ ์คํ
|
45 |
+
COPY download_model.py /app/download_model.py
|
46 |
+
RUN python /app/download_model.py
|
47 |
+
|
48 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์ฝ๋ ๋ณต์ฌ
|
49 |
+
COPY . .
|
50 |
+
|
51 |
+
# ๊ถํ ์ค์
|
52 |
+
RUN chmod +x /app/*.py
|
53 |
+
|
54 |
+
# Hugging Face Spaces์ฉ ์ฑ ์์์ ์์ฑ
|
55 |
+
COPY app_huggingface.py /app/app_huggingface.py
|
56 |
+
|
57 |
+
# ํฌํธ ๋
ธ์ถ (Hugging Face Spaces๋ 7860 ํฌํธ ์ฌ์ฉ)
|
58 |
+
EXPOSE 7860
|
59 |
+
|
60 |
+
# ํฌ์ค์ฒดํฌ
|
61 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
62 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
63 |
+
|
64 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์คํ
|
65 |
+
CMD ["python", "app_huggingface.py"]
|
Dockerfile.latex-ocr
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# LaTeX-OCR ์ ์ฉ ์ปจํ
์ด๋ (CPU ๊ธฐ๋ฐ)
|
2 |
+
FROM python:3.9-slim
|
3 |
+
|
4 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
5 |
+
ENV PYTHONUNBUFFERED=1
|
6 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
7 |
+
|
8 |
+
# ์์คํ
ํจํค์ง ์ค์น
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
build-essential \
|
11 |
+
git \
|
12 |
+
wget \
|
13 |
+
curl \
|
14 |
+
libgl1-mesa-glx \
|
15 |
+
libglib2.0-0 \
|
16 |
+
libsm6 \
|
17 |
+
libxext6 \
|
18 |
+
libxrender-dev \
|
19 |
+
libgomp1 \
|
20 |
+
&& rm -rf /var/lib/apt/lists/*
|
21 |
+
|
22 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
23 |
+
WORKDIR /app
|
24 |
+
|
25 |
+
# LaTeX-OCR ์์กด์ฑ ์ค์น
|
26 |
+
RUN pip install --no-cache-dir \
|
27 |
+
torch==2.0.1 \
|
28 |
+
transformers==4.30.0 \
|
29 |
+
timm==0.6.13 \
|
30 |
+
numpy==1.24.3 \
|
31 |
+
Pillow \
|
32 |
+
requests \
|
33 |
+
faiss-cpu \
|
34 |
+
sentence-transformers \
|
35 |
+
pymupdf \
|
36 |
+
easyocr
|
37 |
+
|
38 |
+
# LaTeX-OCR ์ค์น
|
39 |
+
RUN pip install --no-cache-dir pix2tex
|
40 |
+
|
41 |
+
# ํ๋ก์ ํธ ํ์ผ ๋ณต์ฌ
|
42 |
+
COPY . .
|
43 |
+
|
44 |
+
# LaTeX-OCR ์๋น์ค ์คํฌ๋ฆฝํธ ์์ฑ
|
45 |
+
RUN echo '#!/usr/bin/env python3\n\
|
46 |
+
import sys\n\
|
47 |
+
import os\n\
|
48 |
+
sys.path.insert(0, "/app")\n\
|
49 |
+
\n\
|
50 |
+
from lily_llm_core.latex_ocr_subprocess_v2 import latex_ocr_processor_v2\n\
|
51 |
+
\n\
|
52 |
+
def main():\n\
|
53 |
+
print("LaTeX-OCR ์๋น์ค ์์...")\n\
|
54 |
+
processor = latex_ocr_processor_v2\n\
|
55 |
+
print("LaTeX-OCR ์๋น์ค ์ค๋น ์๋ฃ")\n\
|
56 |
+
\n\
|
57 |
+
# ์๋น์ค ์ ์ง\n\
|
58 |
+
import time\n\
|
59 |
+
while True:\n\
|
60 |
+
time.sleep(1)\n\
|
61 |
+
\n\
|
62 |
+
if __name__ == "__main__":\n\
|
63 |
+
main()\n\
|
64 |
+
' > /app/latex_ocr_service.py
|
65 |
+
|
66 |
+
# ์คํ ๊ถํ ๋ถ์ฌ
|
67 |
+
RUN chmod +x /app/latex_ocr_service.py
|
68 |
+
|
69 |
+
# ์คํ ๋ช
๋ น
|
70 |
+
CMD ["python3", "latex_ocr_service.py"]
|
Dockerfile.local
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๋ก์ปฌ ๊ฐ๋ฐ์ฉ Lily LLM API Server Dockerfile
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# ๋ก์ปฌ ํ๊ฒฝ ๋ณ์ ์ค์
|
5 |
+
ENV IS_LOCAL=true
|
6 |
+
ENV ENVIRONMENT=local
|
7 |
+
ENV DOCKER_ENV=local
|
8 |
+
ENV PYTHONPATH=/app
|
9 |
+
ENV PYTHONUNBUFFERED=1
|
10 |
+
ENV TOKENIZERS_PARALLELISM=false
|
11 |
+
|
12 |
+
# ์์
๋๋ ํ ๋ฆฌ ์ค์
|
13 |
+
WORKDIR /app
|
14 |
+
|
15 |
+
# ์์คํ
์์กด์ฑ ์ค์น
|
16 |
+
RUN apt-get update && apt-get install -y \
|
17 |
+
build-essential \
|
18 |
+
curl \
|
19 |
+
git \
|
20 |
+
wget \
|
21 |
+
ffmpeg \
|
22 |
+
libsm6 \
|
23 |
+
libxext6 \
|
24 |
+
libfontconfig1 \
|
25 |
+
libxrender1 \
|
26 |
+
libgl1-mesa-glx \
|
27 |
+
&& rm -rf /var/lib/apt/lists/*
|
28 |
+
|
29 |
+
# Python ์์กด์ฑ ์ค์น
|
30 |
+
COPY requirements_full.txt requirements.txt
|
31 |
+
RUN pip install --no-cache-dir --upgrade pip
|
32 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
33 |
+
|
34 |
+
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
|
35 |
+
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
|
36 |
+
|
37 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
38 |
+
RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp
|
39 |
+
|
40 |
+
# ์ ํ๋ฆฌ์ผ์ด์
์ฝ๋ ๋ณต์ฌ
|
41 |
+
COPY . .
|
42 |
+
|
43 |
+
# ๊ถํ ์ค์
|
44 |
+
RUN chmod +x /app/*.py
|
45 |
+
|
46 |
+
# ํฌํธ ๋
ธ์ถ (๋ก์ปฌ ๊ฐ๋ฐ์ฉ)
|
47 |
+
EXPOSE 8001
|
48 |
+
|
49 |
+
# ํฌ์ค์ฒดํฌ
|
50 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
51 |
+
CMD curl -f http://localhost:8001/health || exit 1
|
52 |
+
|
53 |
+
# ๋ก์ปฌ ๊ฐ๋ฐ์ฉ ์ฑ ์์์
|
54 |
+
CMD ["python", "app_local.py"]
|
ENVIRONMENT_VARIABLES.md
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๐ง ํ๊ฒฝ ๋ณ์ ์ค์ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ๋ก์ปฌ ๊ฐ๋ฐ ํ๊ฒฝ
|
4 |
+
|
5 |
+
### .env ํ์ผ ์ค์
|
6 |
+
ํ๋ก์ ํธ ๋ฃจํธ์ `.env` ํ์ผ์ ์์ฑํ๊ณ ๋ค์ ๋ณ์๋ค์ ์ค์ ํ์ธ์:
|
7 |
+
|
8 |
+
```bash
|
9 |
+
# ๊ธฐ๋ณธ ์๋ฒ ์ค์
|
10 |
+
HOST=0.0.0.0
|
11 |
+
PORT=8001
|
12 |
+
PYTHONPATH=/app
|
13 |
+
PYTHONUNBUFFERED=1
|
14 |
+
|
15 |
+
# ํ๊ฒฝ ๊ฐ์ง
|
16 |
+
IS_LOCAL=true
|
17 |
+
ENVIRONMENT=local
|
18 |
+
DOCKER_ENV=local
|
19 |
+
|
20 |
+
# ๋ชจ๋ธ ์ค์
|
21 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
22 |
+
MAX_NEW_TOKENS=256
|
23 |
+
TEMPERATURE=0.7
|
24 |
+
|
25 |
+
# ๋ก์ปฌ ๋ชจ๋ธ ๊ฒฝ๋ก (์ ํ์ฌํญ)
|
26 |
+
LOCAL_MODEL_PATH=./lily_llm_core/models/kanana_1_5_v_3b_instruct
|
27 |
+
```
|
28 |
+
|
29 |
+
### ๋ก์ปฌ Docker ์คํ
|
30 |
+
```bash
|
31 |
+
# ๋ก์ปฌ ๊ฐ๋ฐ์ฉ Docker ๋น๋
|
32 |
+
docker build -f Dockerfile.local -t lily-llm-local .
|
33 |
+
|
34 |
+
# ๋ก์ปฌ ์คํ (ํฌํธ 8001)
|
35 |
+
docker run -p 8001:8001 --env-file .env lily-llm-local
|
36 |
+
```
|
37 |
+
|
38 |
+
## โ๏ธ Hugging Face Spaces ํ๊ฒฝ
|
39 |
+
|
40 |
+
### ํ์ ํ๊ฒฝ ๋ณ์
|
41 |
+
|
42 |
+
Hugging Face Spaces Settings > Variables์์ ๋ค์ ๋ณ์๋ค์ ์ค์ ํ์ธ์:
|
43 |
+
|
44 |
+
### ๊ธฐ๋ณธ ์๋ฒ ์ค์
|
45 |
+
```bash
|
46 |
+
HOST=0.0.0.0
|
47 |
+
PORT=7860
|
48 |
+
PYTHONPATH=/app
|
49 |
+
PYTHONUNBUFFERED=1
|
50 |
+
```
|
51 |
+
|
52 |
+
### Hugging Face ์ค์
|
53 |
+
```bash
|
54 |
+
# ์บ์ ๋๋ ํ ๋ฆฌ
|
55 |
+
TRANSFORMERS_CACHE=/app/cache/transformers
|
56 |
+
HF_HOME=/app/cache/huggingface
|
57 |
+
HF_HUB_CACHE=/app/cache/huggingface
|
58 |
+
|
59 |
+
# ๋ชจ๋ธ ์ค์
|
60 |
+
HF_MODEL_NAME=gbrabbit/lily-math-model
|
61 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
62 |
+
|
63 |
+
# ํ ํฐํ ๋ณ๋ ฌ ์ฒ๋ฆฌ ๋นํ์ฑํ (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)
|
64 |
+
TOKENIZERS_PARALLELISM=false
|
65 |
+
```
|
66 |
+
|
67 |
+
### ์ฑ๋ฅ ์ต์ ํ
|
68 |
+
```bash
|
69 |
+
# CPU ์ค๋ ๋ ์ ํ (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)
|
70 |
+
OMP_NUM_THREADS=1
|
71 |
+
MKL_NUM_THREADS=1
|
72 |
+
|
73 |
+
# PyTorch ์ค์
|
74 |
+
TORCH_HOME=/app/cache/torch
|
75 |
+
PYTORCH_TRANSFORMERS_CACHE=/app/cache/transformers
|
76 |
+
```
|
77 |
+
|
78 |
+
### AI ๋ชจ๋ธ ์ค์
|
79 |
+
```bash
|
80 |
+
# ์์ฑ ํ๋ผ๋ฏธํฐ
|
81 |
+
MAX_NEW_TOKENS=256
|
82 |
+
TEMPERATURE=0.7
|
83 |
+
TOP_P=0.9
|
84 |
+
TOP_K=40
|
85 |
+
```
|
86 |
+
|
87 |
+
## ์ ํ์ ํ๊ฒฝ ๋ณ์
|
88 |
+
|
89 |
+
### ๋๋ฒ๊น
|
90 |
+
```bash
|
91 |
+
# ๋ก๊ทธ ๋ ๋ฒจ
|
92 |
+
LOG_LEVEL=INFO
|
93 |
+
DEBUG=false
|
94 |
+
|
95 |
+
# ์์ธ ๋ก๊น
|
96 |
+
TRANSFORMERS_VERBOSITY=warning
|
97 |
+
HF_HUB_VERBOSITY=warning
|
98 |
+
```
|
99 |
+
|
100 |
+
### ๋ณด์ (ํ์์)
|
101 |
+
```bash
|
102 |
+
# API ํค (ํ์ํ ๊ฒฝ์ฐ)
|
103 |
+
HF_TOKEN=your_huggingface_token
|
104 |
+
API_SECRET_KEY=your_secret_key
|
105 |
+
```
|
106 |
+
|
107 |
+
## ๐ ์๋ ๋ชจ๋ธ ๋ค์ด๋ก๋ ๋์ ๋ฐฉ์
|
108 |
+
|
109 |
+
### 1๋จ๊ณ: ๋ก์ปฌ ๋ชจ๋ธ ํ์ธ
|
110 |
+
- `/app/lily_llm_core/models/kanana_1_5_v_3b_instruct/` ๊ฒฝ๋ก ํ์ธ
|
111 |
+
- ํ์ผ์ด ์์ผ๋ฉด ๋ก์ปฌ ๋ชจ๋ธ ์ฌ์ฉ
|
112 |
+
|
113 |
+
### 2๋จ๊ณ: Hugging Face Hub ๋ค์ด๋ก๋
|
114 |
+
- ๋ก์ปฌ ๋ชจ๋ธ์ด ์์ผ๋ฉด `gbrabbit/lily-math-model`์์ ์๋ ๋ค์ด๋ก๋
|
115 |
+
- `/app/cache/transformers/` ๊ฒฝ๋ก์ ์บ์ ์ ์ฅ
|
116 |
+
|
117 |
+
### 3๋จ๊ณ: ๋ชจ๋ธ ๋ก๋ฉ
|
118 |
+
- ์บ์๋ ๋ชจ๋ธ์ ๋ฉ๋ชจ๋ฆฌ์ ๋ก๋
|
119 |
+
- ์๋ฒ ์์ ์๋ฃ
|
120 |
+
|
121 |
+
## ๐ ์์ ๋์
|
122 |
+
|
123 |
+
### ์ฒซ ๋ฒ์งธ ๋ฐฐํฌ
|
124 |
+
```
|
125 |
+
๐ Hugging Face Hub์์ ๋ค์ด๋ก๋: gbrabbit/lily-math-model
|
126 |
+
๐ฅ ๋ชจ๋ธ ๋ค์ด๋ก๋ ์ค... (์ฝ 2-5๋ถ)
|
127 |
+
โ
๋ชจ๋ธ ๋ก๋ ์๋ฃ
|
128 |
+
๐ ์๋ฒ ์์: 0.0.0.0:7860
|
129 |
+
```
|
130 |
+
|
131 |
+
### ์ดํ ์ฌ์์
|
132 |
+
```
|
133 |
+
๐๏ธ ์บ์๋ ๋ชจ๋ธ ์ฌ์ฉ: /app/cache/transformers/
|
134 |
+
โ
๋ชจ๋ธ ๋ก๋ ์๋ฃ (์ฝ 30์ด)
|
135 |
+
๐ ์๋ฒ ์์: 0.0.0.0:7860
|
136 |
+
```
|
137 |
+
|
138 |
+
## ๐ ๋ฌธ์ ํด๊ฒฐ
|
139 |
+
|
140 |
+
### ๋ชจ๋ธ ๋ค์ด๋ก๋ ์คํจ
|
141 |
+
```bash
|
142 |
+
# ๋คํธ์ํฌ ์ฐ๊ฒฐ ํ์ธ
|
143 |
+
curl -I https://huggingface.co/gbrabbit/lily-math-model
|
144 |
+
|
145 |
+
# Hugging Face Hub ์ํ ํ์ธ
|
146 |
+
curl -I https://huggingface.co/api/models/gbrabbit/lily-math-model
|
147 |
+
```
|
148 |
+
|
149 |
+
### ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ
|
150 |
+
```bash
|
151 |
+
# ๋ ์์ ๋ชจ๋ธ ์ฌ์ฉ ๋๋ ์์ํ ์ ์ฉ
|
152 |
+
# Hardware ์
๊ทธ๋ ์ด๋ ๊ณ ๋ ค (CPU upgrade ๋๋ GPU)
|
153 |
+
```
|
154 |
+
|
155 |
+
### ์บ์ ๋ฌธ์
|
156 |
+
```bash
|
157 |
+
# ์บ์ ๋๋ ํ ๋ฆฌ ๊ถํ ํ์ธ
|
158 |
+
ls -la /app/cache/
|
159 |
+
|
160 |
+
# ์บ์ ์ญ์ ํ ์ฌ์์
|
161 |
+
rm -rf /app/cache/transformers/*
|
162 |
+
```
|
GPU_DEPLOYMENT_GUIDE.md
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๐ GPU ํ๊ฒฝ ๋ฐฐํฌ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ์ฌ์ ์๊ตฌ์ฌํญ
|
4 |
+
|
5 |
+
### 1. ํ๋์จ์ด ์๊ตฌ์ฌํญ
|
6 |
+
- **GPU**: NVIDIA GPU (RTX 3060 ์ด์ ๊ถ์ฅ)
|
7 |
+
- **๋ฉ๋ชจ๋ฆฌ**: ์ต์ 16GB RAM, ๊ถ์ฅ 32GB RAM
|
8 |
+
- **์ ์ฅ๊ณต๊ฐ**: ์ต์ 50GB ์ฌ์ ๊ณต๊ฐ
|
9 |
+
|
10 |
+
### 2. ์ํํธ์จ์ด ์๊ตฌ์ฌํญ
|
11 |
+
|
12 |
+
#### NVIDIA ๋๋ผ์ด๋ฒ ์ค์น
|
13 |
+
```bash
|
14 |
+
# Ubuntu/Debian
|
15 |
+
sudo apt update
|
16 |
+
sudo apt install nvidia-driver-470
|
17 |
+
|
18 |
+
# Windows
|
19 |
+
# NVIDIA ์น์ฌ์ดํธ์์ ์ต์ ๋๋ผ์ด๋ฒ ๋ค์ด๋ก๋
|
20 |
+
```
|
21 |
+
|
22 |
+
#### CUDA ์ค์น
|
23 |
+
```bash
|
24 |
+
# CUDA 11.8 ์ค์น (๊ถ์ฅ)
|
25 |
+
wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
|
26 |
+
sudo sh cuda_11.8.0_520.61.05_linux.run
|
27 |
+
```
|
28 |
+
|
29 |
+
#### Docker ์ค์น
|
30 |
+
```bash
|
31 |
+
# Ubuntu/Debian
|
32 |
+
curl -fsSL https://get.docker.com -o get-docker.sh
|
33 |
+
sudo sh get-docker.sh
|
34 |
+
sudo usermod -aG docker $USER
|
35 |
+
|
36 |
+
# Windows
|
37 |
+
# Docker Desktop ์ค์น
|
38 |
+
```
|
39 |
+
|
40 |
+
#### NVIDIA Docker ์ค์น
|
41 |
+
```bash
|
42 |
+
# NVIDIA Container Toolkit ์ค์น
|
43 |
+
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
44 |
+
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
|
45 |
+
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
|
46 |
+
|
47 |
+
sudo apt-get update
|
48 |
+
sudo apt-get install -y nvidia-docker2
|
49 |
+
sudo systemctl restart docker
|
50 |
+
```
|
51 |
+
|
52 |
+
## ๐ง ํ๊ฒฝ ์ค์
|
53 |
+
|
54 |
+
### 1. GPU ํ๊ฒฝ ํ์ธ
|
55 |
+
```bash
|
56 |
+
cd C:\Project\lily_generate_project\lily_generate_package
|
57 |
+
python check_gpu_environment.py
|
58 |
+
```
|
59 |
+
|
60 |
+
### 2. Hugging Face ์ค์
|
61 |
+
```bash
|
62 |
+
# Hugging Face ํ ํฐ ์ค์
|
63 |
+
huggingface-cli login
|
64 |
+
|
65 |
+
# ๋๋ Python ์คํฌ๋ฆฝํธ๋ก ์ค์
|
66 |
+
python huggingface_gpu_setup.py
|
67 |
+
```
|
68 |
+
|
69 |
+
## ๐ ๋ฐฐํฌ ์คํ
|
70 |
+
|
71 |
+
### 1. ์๋ ๋ฐฐํฌ (๊ถ์ฅ)
|
72 |
+
```bash
|
73 |
+
# ๋ฐฐํฌ ์คํฌ๋ฆฝํธ ์คํ
|
74 |
+
chmod +x deploy_gpu_huggingface.sh
|
75 |
+
./deploy_gpu_huggingface.sh
|
76 |
+
```
|
77 |
+
|
78 |
+
### 2. ์๋ ๋ฐฐํฌ
|
79 |
+
```bash
|
80 |
+
# 1. ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ
|
81 |
+
docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
|
82 |
+
|
83 |
+
# 2. GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
84 |
+
nvidia-smi --gpu-reset
|
85 |
+
|
86 |
+
# 3. ์ด๋ฏธ์ง ๋น๋
|
87 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
88 |
+
|
89 |
+
# 4. ์ปจํ
์ด๋ ์์
|
90 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
91 |
+
|
92 |
+
# 5. ์๋น์ค ์ํ ํ์ธ
|
93 |
+
docker-compose -f docker-compose.gpu.yml logs -f
|
94 |
+
```
|
95 |
+
|
96 |
+
## ๐งช ํ
์คํธ
|
97 |
+
|
98 |
+
### 1. GPU ๋ฐฐํฌ ํ
์คํธ
|
99 |
+
```bash
|
100 |
+
python test_gpu_deployment.py
|
101 |
+
```
|
102 |
+
|
103 |
+
### 2. Hugging Face ๋ชจ๋ธ ํ
์คํธ
|
104 |
+
```bash
|
105 |
+
python huggingface_gpu_setup.py
|
106 |
+
```
|
107 |
+
|
108 |
+
### 3. API ํ
์คํธ
|
109 |
+
```bash
|
110 |
+
curl http://localhost:8001/health
|
111 |
+
```
|
112 |
+
|
113 |
+
## ๐ ๋ชจ๋ํฐ๋ง
|
114 |
+
|
115 |
+
### 1. GPU ์ฌ์ฉ๋ ํ์ธ
|
116 |
+
```bash
|
117 |
+
nvidia-smi
|
118 |
+
nvidia-smi -l 1 # 1์ด๋ง๋ค ์
๋ฐ์ดํธ
|
119 |
+
```
|
120 |
+
|
121 |
+
### 2. ์ปจํ
์ด๋ ์ํ ํ์ธ
|
122 |
+
```bash
|
123 |
+
docker ps
|
124 |
+
docker stats
|
125 |
+
```
|
126 |
+
|
127 |
+
### 3. ๋ก๊ทธ ํ์ธ
|
128 |
+
```bash
|
129 |
+
# ์ ์ฒด ๋ก๊ทธ
|
130 |
+
docker-compose -f docker-compose.gpu.yml logs -f
|
131 |
+
|
132 |
+
# ํน์ ์๋น์ค ๋ก๊ทธ
|
133 |
+
docker-compose -f docker-compose.gpu.yml logs -f lily-llm-api-gpu
|
134 |
+
```
|
135 |
+
|
136 |
+
## ๐ง ๋ฌธ์ ํด๊ฒฐ
|
137 |
+
|
138 |
+
### 1. GPU ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ
|
139 |
+
```bash
|
140 |
+
# GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
141 |
+
nvidia-smi --gpu-reset
|
142 |
+
|
143 |
+
# ์ปจํ
์ด๋ ์ฌ์์
|
144 |
+
docker-compose -f docker-compose.gpu.yml restart
|
145 |
+
```
|
146 |
+
|
147 |
+
### 2. CUDA ๋ฒ์ ์ถฉ๋
|
148 |
+
```bash
|
149 |
+
# CUDA ๋ฒ์ ํ์ธ
|
150 |
+
nvcc --version
|
151 |
+
|
152 |
+
# PyTorch CUDA ๋ฒ์ ํ์ธ
|
153 |
+
python -c "import torch; print(torch.version.cuda)"
|
154 |
+
```
|
155 |
+
|
156 |
+
### 3. Docker ๊ถํ ๋ฌธ์
|
157 |
+
```bash
|
158 |
+
# Docker ๊ทธ๋ฃน์ ์ฌ์ฉ์ ์ถ๊ฐ
|
159 |
+
sudo usermod -aG docker $USER
|
160 |
+
|
161 |
+
# ์ฌ๋ก๊ทธ์ธ ํ ํ์ธ
|
162 |
+
docker ps
|
163 |
+
```
|
164 |
+
|
165 |
+
### 4. Hugging Face ํ ํฐ ๋ฌธ์
|
166 |
+
```bash
|
167 |
+
# ํ ํฐ ์ฌ์ค์
|
168 |
+
huggingface-cli logout
|
169 |
+
huggingface-cli login
|
170 |
+
```
|
171 |
+
|
172 |
+
## ๐ ์ฑ๋ฅ ์ต์ ํ
|
173 |
+
|
174 |
+
### 1. ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ
|
175 |
+
```bash
|
176 |
+
# 4-bit ์์ํ ์ ์ฉ
|
177 |
+
python huggingface_gpu_setup.py
|
178 |
+
|
179 |
+
# ์ฑ๋ฅ ์ต์ ํ ์ ์ฉ
|
180 |
+
python performance_optimization.py
|
181 |
+
```
|
182 |
+
|
183 |
+
### 2. ๋ฐฐ์น ํฌ๊ธฐ ์กฐ์
|
184 |
+
```python
|
185 |
+
# config.yaml์์ ๋ฐฐ์น ํฌ๊ธฐ ์กฐ์
|
186 |
+
batch_size: 4 # GPU ๋ฉ๋ชจ๋ฆฌ์ ๋ฐ๋ผ ์กฐ์
|
187 |
+
```
|
188 |
+
|
189 |
+
### 3. ๋ชจ๋ธ ์บ์ฑ
|
190 |
+
```bash
|
191 |
+
# Hugging Face ์บ์ ์ค์
|
192 |
+
export HF_HOME="/path/to/cache"
|
193 |
+
export TRANSFORMERS_CACHE="/path/to/cache"
|
194 |
+
```
|
195 |
+
|
196 |
+
## ๐ ์
๋ฐ์ดํธ
|
197 |
+
|
198 |
+
### 1. ๋ชจ๋ธ ์
๋ฐ์ดํธ
|
199 |
+
```bash
|
200 |
+
# ์ต์ ๋ชจ๋ธ ๋ค์ด๋ก๋
|
201 |
+
python huggingface_gpu_setup.py
|
202 |
+
|
203 |
+
# ์ปจํ
์ด๋ ์ฌ์์
|
204 |
+
docker-compose -f docker-compose.gpu.yml restart
|
205 |
+
```
|
206 |
+
|
207 |
+
### 2. ์ฝ๋ ์
๋ฐ์ดํธ
|
208 |
+
```bash
|
209 |
+
# ์ฝ๋ ๋ณ๊ฒฝ ํ ์ฌ๋น๋
|
210 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
211 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
212 |
+
```
|
213 |
+
|
214 |
+
## ๐ ์ง์
|
215 |
+
|
216 |
+
### ๋ฌธ์ ๋ฐ์ ์ ํ์ธ์ฌํญ
|
217 |
+
1. GPU ๋๋ผ์ด๋ฒ ๋ฒ์
|
218 |
+
2. CUDA ๋ฒ์
|
219 |
+
3. Docker ๋ฒ์
|
220 |
+
4. ์์คํ
๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋
|
221 |
+
5. GPU ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋
|
222 |
+
|
223 |
+
### ๋ก๊ทธ ํ์ผ ์์น
|
224 |
+
- Docker ๋ก๊ทธ: `docker-compose -f docker-compose.gpu.yml logs`
|
225 |
+
- ์ ํ๋ฆฌ์ผ์ด์
๋ก๊ทธ: `logs/` ๋๋ ํ ๋ฆฌ
|
226 |
+
- GPU ๋ก๊ทธ: `nvidia-smi`
|
227 |
+
|
228 |
+
## ๐ฏ ์ฑ๋ฅ ๋ฒค์น๋งํฌ
|
229 |
+
|
230 |
+
### ๊ถ์ฅ ์ฌ์๋ณ ์ฑ๋ฅ
|
231 |
+
- **RTX 3060 (12GB)**: ๊ธฐ๋ณธ ๋ชจ๋ธ ์คํ ๊ฐ๋ฅ
|
232 |
+
- **RTX 3080 (10GB)**: ์ค๊ฐ ํฌ๊ธฐ ๋ชจ๋ธ ์คํ ๊ฐ๋ฅ
|
233 |
+
- **RTX 3090 (24GB)**: ๋์ฉ๋ ๋ชจ๋ธ ์คํ ๊ฐ๋ฅ
|
234 |
+
- **RTX 4090 (24GB)**: ์ต๊ณ ์ฑ๋ฅ, ๋ชจ๋ ๋ชจ๋ธ ์คํ ๊ฐ๋ฅ
|
235 |
+
|
236 |
+
### ๋ฉ๋ชจ๏ฟฝ๏ฟฝ๏ฟฝ ์ฌ์ฉ๋ ๊ฐ์ด๋
|
237 |
+
- **4-bit ์์ํ**: ๋ชจ๋ธ ํฌ๊ธฐ์ ์ฝ 25%
|
238 |
+
- **8-bit ์์ํ**: ๋ชจ๋ธ ํฌ๊ธฐ์ ์ฝ 50%
|
239 |
+
- **16-bit (FP16)**: ๋ชจ๋ธ ํฌ๊ธฐ์ ์ฝ 100%
|
240 |
+
- **32-bit (FP32)**: ๋ชจ๋ธ ํฌ๊ธฐ์ ์ฝ 200%
|
HEARTH_CHAT_INTEGRATION.md
ADDED
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Hearth Chat๊ณผ Lily LLM API ์ฐ๋ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ์ฐ๋ ๊ฐ์
|
4 |
+
|
5 |
+
Hugging Face Spaces์ ๋ฐฐํฌ๋ Lily LLM API๋ฅผ Railway์์ ํธ์คํ
๋๋ Hearth Chat ์๋น์ค์ ์ฐ๋ํ๋ ๋ฐฉ๋ฒ์ ์ค๋ช
ํฉ๋๋ค.
|
6 |
+
|
7 |
+
## 1. Hugging Face Spaces ๋ฐฐํฌ ์๋ฃ ํ์ธ
|
8 |
+
|
9 |
+
### 1.1 API ์๋ํฌ์ธํธ ํ์ธ
|
10 |
+
๋ฐฐํฌ๋ Lily LLM API URL:
|
11 |
+
```
|
12 |
+
https://YOUR_USERNAME-lily-llm-api.hf.space
|
13 |
+
```
|
14 |
+
|
15 |
+
### 1.2 ์ฃผ์ ์๋ํฌ์ธํธ ํ
์คํธ
|
16 |
+
|
17 |
+
```bash
|
18 |
+
# ํฌ์ค ์ฒดํฌ
|
19 |
+
curl https://YOUR_USERNAME-lily-llm-api.hf.space/health
|
20 |
+
|
21 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก ํ์ธ
|
22 |
+
curl https://YOUR_USERNAME-lily-llm-api.hf.space/models
|
23 |
+
|
24 |
+
# ํ
์คํธ ์์ฑ ํ
์คํธ
|
25 |
+
curl -X POST https://YOUR_USERNAME-lily-llm-api.hf.space/generate \
|
26 |
+
-F "prompt=์๋
ํ์ธ์! ํ
์คํธ์
๋๋ค."
|
27 |
+
```
|
28 |
+
|
29 |
+
## 2. Hearth Chat ์ค์ ์
๋ฐ์ดํธ
|
30 |
+
|
31 |
+
### 2.1 AI ์ค์ ๋ชจ๋ฌ ์
๋ฐ์ดํธ
|
32 |
+
|
33 |
+
`hearth_chat_react/src/components/AISettingsModal.js`์์ Lily LLM ์ค์ ์ถ๊ฐ:
|
34 |
+
|
35 |
+
```javascript
|
36 |
+
// Lily LLM API URL ์ค์
|
37 |
+
{settings.aiProvider === 'lily' && (
|
38 |
+
<>
|
39 |
+
<div className="setting-group">
|
40 |
+
<label className="setting-label">Lily API URL:</label>
|
41 |
+
<input
|
42 |
+
type="url"
|
43 |
+
value={settings.lilyApiUrl}
|
44 |
+
onChange={(e) => handleInputChange('lilyApiUrl', e.target.value)}
|
45 |
+
placeholder="https://your-username-lily-llm-api.hf.space"
|
46 |
+
/>
|
47 |
+
</div>
|
48 |
+
|
49 |
+
<div className="setting-group">
|
50 |
+
<label className="setting-label">Lily ๋ชจ๋ธ:</label>
|
51 |
+
<select
|
52 |
+
value={settings.lilyModel}
|
53 |
+
onChange={(e) => handleInputChange('lilyModel', e.target.value)}
|
54 |
+
>
|
55 |
+
<option value="kanana-1.5-v-3b-instruct">Kanana 1.5 v3B Instruct</option>
|
56 |
+
</select>
|
57 |
+
</div>
|
58 |
+
|
59 |
+
{/* API ์ฐ๊ฒฐ ์ํ ํ์ */}
|
60 |
+
<div className="model-info">
|
61 |
+
<small style={{ color: '#4CAF50', fontWeight: 'bold' }}>
|
62 |
+
๐ Hugging Face Spaces์์ ํธ์คํ
|
63 |
+
</small>
|
64 |
+
<small style={{ color: '#666', display: 'block', marginTop: '4px' }}>
|
65 |
+
๋ฉํฐ๋ชจ๋ฌ AI ๋ชจ๋ธ (ํ
์คํธ + ์ด๋ฏธ์ง ์ฒ๋ฆฌ)
|
66 |
+
</small>
|
67 |
+
</div>
|
68 |
+
</>
|
69 |
+
)}
|
70 |
+
```
|
71 |
+
|
72 |
+
### 2.2 ์ฐ๊ฒฐ ํ
์คํธ ํจ์ ์
๋ฐ์ดํธ
|
73 |
+
|
74 |
+
```javascript
|
75 |
+
case 'lily':
|
76 |
+
testUrl = `${settings.lilyApiUrl}/health`;
|
77 |
+
testData = {
|
78 |
+
method: 'GET',
|
79 |
+
headers: {
|
80 |
+
'Accept': 'application/json'
|
81 |
+
}
|
82 |
+
};
|
83 |
+
|
84 |
+
// ์ถ๊ฐ ์์ฑ ํ
์คํธ
|
85 |
+
if (response.ok) {
|
86 |
+
const generateTestUrl = `${settings.lilyApiUrl}/generate`;
|
87 |
+
const generateResponse = await fetch(generateTestUrl, {
|
88 |
+
method: 'POST',
|
89 |
+
body: new FormData([
|
90 |
+
['prompt', '์ฐ๊ฒฐ ํ
์คํธ์
๋๋ค.']
|
91 |
+
])
|
92 |
+
});
|
93 |
+
|
94 |
+
if (generateResponse.ok) {
|
95 |
+
const result = await generateResponse.json();
|
96 |
+
console.log('Lily LLM ์์ฑ ํ
์คํธ ์ฑ๊ณต:', result);
|
97 |
+
}
|
98 |
+
}
|
99 |
+
break;
|
100 |
+
```
|
101 |
+
|
102 |
+
## 3. ๋ฐฑ์๋ ์ฐ๋ ์
๋ฐ์ดํธ
|
103 |
+
|
104 |
+
### 3.1 Consumers.py ์์
|
105 |
+
|
106 |
+
`hearth_chat_django/chat/consumers.py`์์ Lily LLM API ํธ์ถ ๋ถ๋ถ:
|
107 |
+
|
108 |
+
```python
|
109 |
+
async def call_lily_api(user_message, user_emotion, image_urls=None, documents=None):
|
110 |
+
"""Lily LLM API ํธ์ถ (Hugging Face Spaces)"""
|
111 |
+
import requests
|
112 |
+
import aiohttp
|
113 |
+
|
114 |
+
try:
|
115 |
+
# ์ฌ์ฉ์ ์ค์ ์์ API URL ๊ฐ์ ธ์ค๊ธฐ
|
116 |
+
user = getattr(self, 'scope', {}).get('user', None)
|
117 |
+
ai_settings = None
|
118 |
+
if user and hasattr(user, 'is_authenticated') and user.is_authenticated:
|
119 |
+
ai_settings = await self.get_user_ai_settings(user)
|
120 |
+
|
121 |
+
# API URL ์ค์ (๊ธฐ๋ณธ๊ฐ: Hugging Face Spaces)
|
122 |
+
lily_api_url = ai_settings.get('lilyApiUrl', 'https://gbrabbit-lily-math-rag.hf.space') if ai_settings else 'https://gbrabbit-lily-math-rag.hf.space'
|
123 |
+
lily_model = ai_settings.get('lilyModel', 'kanana-1.5-v-3b-instruct') if ai_settings else 'kanana-1.5-v-3b-instruct'
|
124 |
+
|
125 |
+
# API ์๋ํฌ์ธํธ
|
126 |
+
generate_url = f"{lily_api_url}/generate"
|
127 |
+
|
128 |
+
# ์์ฒญ ๋ฐ์ดํฐ ์ค๋น
|
129 |
+
data = {
|
130 |
+
'prompt': f"{emotion_prompt}\n\n์ฌ์ฉ์ ๋ฉ์์ง: {user_message}",
|
131 |
+
'max_length': 200,
|
132 |
+
'temperature': 0.7
|
133 |
+
}
|
134 |
+
|
135 |
+
files = {}
|
136 |
+
|
137 |
+
# ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
138 |
+
if image_urls and len(image_urls) > 0:
|
139 |
+
print(f"๐ผ๏ธ ์ด๋ฏธ์ง ์ฒ๋ฆฌ: {len(image_urls)}๊ฐ")
|
140 |
+
|
141 |
+
async with aiohttp.ClientSession() as session:
|
142 |
+
for i, image_url in enumerate(image_urls[:4]): # ์ต๋ 4๊ฐ ์ด๋ฏธ์ง
|
143 |
+
try:
|
144 |
+
async with session.get(image_url) as img_response:
|
145 |
+
if img_response.status == 200:
|
146 |
+
image_data = await img_response.read()
|
147 |
+
files[f'image{i+1}'] = ('image.jpg', image_data, 'image/jpeg')
|
148 |
+
except Exception as e:
|
149 |
+
print(f"โ ์ด๋ฏธ์ง {i+1} ๋ก๋ ์คํจ: {e}")
|
150 |
+
|
151 |
+
# API ํธ์ถ
|
152 |
+
timeout = aiohttp.ClientTimeout(total=120) # 2๋ถ ํ์์์
|
153 |
+
|
154 |
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
155 |
+
if files:
|
156 |
+
# ๋ฉํฐํํธ ์์ฒญ (์ด๋ฏธ์ง ํฌํจ)
|
157 |
+
form_data = aiohttp.FormData()
|
158 |
+
for key, value in data.items():
|
159 |
+
form_data.add_field(key, str(value))
|
160 |
+
for key, (filename, file_data, content_type) in files.items():
|
161 |
+
form_data.add_field(key, file_data, filename=filename, content_type=content_type)
|
162 |
+
|
163 |
+
async with session.post(generate_url, data=form_data) as response:
|
164 |
+
if response.status == 200:
|
165 |
+
result = await response.json()
|
166 |
+
lily_response = result.get('generated_text', '์ฃ์กํฉ๋๋ค. ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค.')
|
167 |
+
|
168 |
+
return {
|
169 |
+
"response": lily_response,
|
170 |
+
"provider": "lily",
|
171 |
+
"ai_name": "Lily LLM",
|
172 |
+
"ai_type": "huggingface"
|
173 |
+
}
|
174 |
+
else:
|
175 |
+
error_text = await response.text()
|
176 |
+
raise Exception(f"Lily API ์ค๋ฅ: {response.status} - {error_text}")
|
177 |
+
else:
|
178 |
+
# ์ผ๋ฐ POST ์์ฒญ (ํ
์คํธ๋ง)
|
179 |
+
async with session.post(generate_url, data=data) as response:
|
180 |
+
if response.status == 200:
|
181 |
+
result = await response.json()
|
182 |
+
lily_response = result.get('generated_text', '์ฃ์กํฉ๋๋ค. ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค.')
|
183 |
+
|
184 |
+
return {
|
185 |
+
"response": lily_response,
|
186 |
+
"provider": "lily",
|
187 |
+
"ai_name": "Lily LLM",
|
188 |
+
"ai_type": "huggingface"
|
189 |
+
}
|
190 |
+
else:
|
191 |
+
error_text = await response.text()
|
192 |
+
raise Exception(f"Lily API ์ค๋ฅ: {response.status} - {error_text}")
|
193 |
+
|
194 |
+
except Exception as e:
|
195 |
+
print(f"โ Lily LLM API ํธ์ถ ์คํจ: {e}")
|
196 |
+
raise e
|
197 |
+
```
|
198 |
+
|
199 |
+
### 3.2 ํ๊ฒฝ ๋ณ์ ์ค์
|
200 |
+
|
201 |
+
Railway ํ๊ฒฝ์์ ๋ค์ ํ๊ฒฝ ๋ณ์ ์ถ๊ฐ:
|
202 |
+
|
203 |
+
```bash
|
204 |
+
# Lily LLM API ์ค์
|
205 |
+
LILY_LLM_API_URL=https://YOUR_USERNAME-lily-llm-api.hf.space
|
206 |
+
LILY_LLM_MODEL=kanana-1.5-v-3b-instruct
|
207 |
+
LILY_LLM_TIMEOUT=120
|
208 |
+
```
|
209 |
+
|
210 |
+
## 4. ํ
์คํธ ๋ฐ ๊ฒ์ฆ
|
211 |
+
|
212 |
+
### 4.1 ์ฐ๋ ํ
์คํธ ์คํฌ๋ฆฝํธ
|
213 |
+
|
214 |
+
```python
|
215 |
+
# test_hearth_lily_integration.py
|
216 |
+
|
217 |
+
import requests
|
218 |
+
import json
|
219 |
+
|
220 |
+
def test_hearth_chat_lily_integration():
|
221 |
+
"""Hearth Chat๊ณผ Lily LLM ์ฐ๋ ํ
์คํธ"""
|
222 |
+
|
223 |
+
# Hearth Chat API ์๋ํฌ์ธํธ (Railway)
|
224 |
+
hearth_chat_url = "https://your-hearth-chat.railway.app"
|
225 |
+
|
226 |
+
# 1. ๋ก๊ทธ์ธ ๋ฐ ์ธ์
ํ๋
|
227 |
+
session = requests.Session()
|
228 |
+
|
229 |
+
# 2. AI ์ค์ ์
๋ฐ์ดํธ
|
230 |
+
ai_settings = {
|
231 |
+
"aiProvider": "lily",
|
232 |
+
"lilyApiUrl": "https://YOUR_USERNAME-lily-llm-api.hf.space",
|
233 |
+
"lilyModel": "kanana-1.5-v-3b-instruct",
|
234 |
+
"aiEnabled": True
|
235 |
+
}
|
236 |
+
|
237 |
+
settings_response = session.patch(
|
238 |
+
f"{hearth_chat_url}/api/chat/user/settings/",
|
239 |
+
json=ai_settings
|
240 |
+
)
|
241 |
+
|
242 |
+
print(f"์ค์ ์
๋ฐ์ดํธ: {settings_response.status_code}")
|
243 |
+
|
244 |
+
# 3. ์ฑํ
ํ
์คํธ
|
245 |
+
test_messages = [
|
246 |
+
"์๋
ํ์ธ์! Lily LLM ํ
์คํธ์
๋๋ค.",
|
247 |
+
"์ค๋ ๋ ์จ๊ฐ ์ด๋ค๊ฐ์?",
|
248 |
+
"๊ฐ๋จํ ์ํ ๋ฌธ์ ๋ฅผ ๋ด์ฃผ์ธ์."
|
249 |
+
]
|
250 |
+
|
251 |
+
for message in test_messages:
|
252 |
+
print(f"\n๐ค ํ
์คํธ ๋ฉ์์ง: {message}")
|
253 |
+
|
254 |
+
# WebSocket ๋๋ HTTP API๋ฅผ ํตํ ๋ฉ์์ง ์ ์ก
|
255 |
+
# (์ค์ ๊ตฌํ์ ๋ฐ๋ผ ์กฐ์ )
|
256 |
+
|
257 |
+
# ์๋ต ํ์ธ
|
258 |
+
print(f"โ
์๋ต ๋ฐ์")
|
259 |
+
|
260 |
+
if __name__ == "__main__":
|
261 |
+
test_hearth_chat_lily_integration()
|
262 |
+
```
|
263 |
+
|
264 |
+
### 4.2 ์ด๋ฏธ์ง ์ฒ๋ฆฌ ํ
์คํธ
|
265 |
+
|
266 |
+
```python
|
267 |
+
def test_image_processing():
|
268 |
+
"""์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ฐ๋ ํ
์คํธ"""
|
269 |
+
|
270 |
+
# ํ
์คํธ ์ด๋ฏธ์ง ์
๋ก๋
|
271 |
+
with open("test_image.jpg", "rb") as f:
|
272 |
+
files = {"image": f}
|
273 |
+
data = {"message": "์ด๋ฏธ์ง์์ ๋ฌด์์ ๋ณผ ์ ์๋์?"}
|
274 |
+
|
275 |
+
response = requests.post(
|
276 |
+
"https://your-hearth-chat.railway.app/api/chat/send-message/",
|
277 |
+
files=files,
|
278 |
+
data=data
|
279 |
+
)
|
280 |
+
|
281 |
+
print(f"์ด๋ฏธ์ง ์ฒ๋ฆฌ ํ
์คํธ: {response.status_code}")
|
282 |
+
print(f"์๋ต: {response.json()}")
|
283 |
+
```
|
284 |
+
|
285 |
+
## 5. ๋ชจ๋ํฐ๋ง ๋ฐ ๋ก๊ทธ
|
286 |
+
|
287 |
+
### 5.1 Hugging Face Spaces ๋ก๊ทธ ๋ชจ๋ํฐ๋ง
|
288 |
+
|
289 |
+
```bash
|
290 |
+
# Spaces ๋์๋ณด๋์์ ์ค์๊ฐ ๋ก๊ทธ ํ์ธ
|
291 |
+
# API ํธ์ถ ๋น๋ ๋ฐ ์๋ต ์๊ฐ ๋ชจ๋ํฐ๋ง
|
292 |
+
```
|
293 |
+
|
294 |
+
### 5.2 Railway ๋ก๊ทธ ๋ชจ๋ํฐ๋ง
|
295 |
+
|
296 |
+
```bash
|
297 |
+
# Railway ๋์๋ณด๋์์ Hearth Chat ๋ก๊ทธ ํ์ธ
|
298 |
+
# Lily LLM API ํธ์ถ ์ฑ๊ณต/์คํจ ๋ชจ๋ํฐ๋ง
|
299 |
+
```
|
300 |
+
|
301 |
+
## 6. ์ฑ๋ฅ ์ต์ ํ
|
302 |
+
|
303 |
+
### 6.1 ์บ์ฑ ์ ๋ต
|
304 |
+
|
305 |
+
```python
|
306 |
+
# Redis๋ฅผ ์ด์ฉํ ์๋ต ์บ์ฑ
|
307 |
+
import redis
|
308 |
+
|
309 |
+
redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
310 |
+
|
311 |
+
def cached_lily_response(prompt_hash, response):
|
312 |
+
"""์๋ต ์บ์ฑ"""
|
313 |
+
redis_client.setex(f"lily_cache:{prompt_hash}", 3600, json.dumps(response))
|
314 |
+
|
315 |
+
def get_cached_response(prompt_hash):
|
316 |
+
"""์บ์๋ ์๋ต ์กฐํ"""
|
317 |
+
cached = redis_client.get(f"lily_cache:{prompt_hash}")
|
318 |
+
return json.loads(cached) if cached else None
|
319 |
+
```
|
320 |
+
|
321 |
+
### 6.2 ๋ก๋ ๋ฐธ๋ฐ์ฑ
|
322 |
+
|
323 |
+
```python
|
324 |
+
# ์ฌ๋ฌ Hugging Face Spaces ์ธ์คํด์ค ์ฌ์ฉ
|
325 |
+
LILY_API_ENDPOINTS = [
|
326 |
+
"https://username1-lily-llm-api.hf.space",
|
327 |
+
"https://username2-lily-llm-api.hf.space"
|
328 |
+
]
|
329 |
+
|
330 |
+
def get_available_endpoint():
|
331 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ์๋ํฌ์ธํธ ์ ํ"""
|
332 |
+
for endpoint in LILY_API_ENDPOINTS:
|
333 |
+
try:
|
334 |
+
response = requests.get(f"{endpoint}/health", timeout=5)
|
335 |
+
if response.status_code == 200:
|
336 |
+
return endpoint
|
337 |
+
except:
|
338 |
+
continue
|
339 |
+
return LILY_API_ENDPOINTS[0] # ๊ธฐ๋ณธ๊ฐ
|
340 |
+
```
|
341 |
+
|
342 |
+
## 7. ๋ณด์ ๊ณ ๋ ค์ฌํญ
|
343 |
+
|
344 |
+
### 7.1 API ํค ๊ด๋ฆฌ
|
345 |
+
|
346 |
+
```python
|
347 |
+
# ํ๊ฒฝ ๋ณ์๋ก ๋ฏผ๊ฐํ ์ ๋ณด ๊ด๋ฆฌ
|
348 |
+
import os
|
349 |
+
|
350 |
+
LILY_API_KEY = os.getenv('LILY_API_KEY') # ํ์์
|
351 |
+
LILY_API_SECRET = os.getenv('LILY_API_SECRET') # ํ์์
|
352 |
+
```
|
353 |
+
|
354 |
+
### 7.2 ์์ฒญ ์ ํ
|
355 |
+
|
356 |
+
```python
|
357 |
+
# ์ฌ์ฉ์๋ณ ์์ฒญ ์ ํ
|
358 |
+
from django.core.cache import cache
|
359 |
+
|
360 |
+
def check_rate_limit(user_id):
|
361 |
+
"""์ฌ์ฉ์๋ณ ์์ฒญ ์ ํ ํ์ธ"""
|
362 |
+
key = f"lily_api_rate_limit:{user_id}"
|
363 |
+
current = cache.get(key, 0)
|
364 |
+
|
365 |
+
if current >= 100: # ์๊ฐ๋น 100ํ ์ ํ
|
366 |
+
return False
|
367 |
+
|
368 |
+
cache.set(key, current + 1, 3600) # 1์๊ฐ
|
369 |
+
return True
|
370 |
+
```
|
371 |
+
|
372 |
+
---
|
373 |
+
|
374 |
+
## ๐ ์ฐ๋ ์๋ฃ
|
375 |
+
|
376 |
+
๋ชจ๋ ์ค์ ์ด ์๋ฃ๋๋ฉด:
|
377 |
+
|
378 |
+
1. **Hugging Face Spaces**: Lily LLM API ์๋ฒ ํธ์คํ
|
379 |
+
2. **Railway**: Hearth Chat ์๋น์ค ํธ์คํ
|
380 |
+
3. **์ฐ๋**: ๋ ์๋น์ค ๊ฐ ์ํํ ํต์
|
381 |
+
|
382 |
+
์ฌ์ฉ์๋ Hearth Chat ์ธํฐํ์ด์ค๋ฅผ ํตํด Hugging Face์์ ํธ์คํ
๋๋ ๊ฐ๋ ฅํ Lily LLM AI๋ฅผ ์ฌ์ฉํ ์ ์๊ฒ ๋ฉ๋๋ค! ๐
|
HISTORY.md
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM RAG ์์คํ
๊ฐ๋ฐ ํ์คํ ๋ฆฌ
|
2 |
+
|
3 |
+
## ๐ ํ๋ก์ ํธ ๊ฐ์
|
4 |
+
- **๋ชฉํ**: PDF ๋ฌธ์์ ์ํ ๋ฌธ์ ํด์ ๋ฐ ํด๊ฒฐ์ ์ํ RAG ์์คํ
๊ตฌ์ถ
|
5 |
+
- **ํ๊ฒฝ**: CPU ๊ธฐ๋ฐ ๊ฐ๋ฐ ํ๊ฒฝ (GPU ์๋ฒ ๋ฐฐํฌ ์ ํ
์คํธ)
|
6 |
+
- **์ ๊ทผ ๋ฐฉ์**: ํ
์คํธ ๊ธฐ๋ฐ RAG โ ์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ๋น ๋ฅธ ๊ฒ์ฆ
|
7 |
+
|
8 |
+
## ๐ ์ฃผ์ ์์
ํ๋ฆ
|
9 |
+
|
10 |
+
### 1๋จ๊ณ: ๊ธฐ์กด ๋ฌธ์ ๋ถ์
|
11 |
+
- **๋ฌธ์ **: Kanana ๋ชจ๋ธ์ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ์์ ํ ํฐ ID ์๋ต ๋ฌธ์
|
12 |
+
- **์์ธ**: CPU ํ๊ฒฝ์์ Kanana ๋ชจ๋ธ์ ์ด๋ฏธ์ง ์ฒ๋ฆฌ ํ๊ณ
|
13 |
+
- **๊ฒฐ์ **: ํ
์คํธ ๊ธฐ๋ฐ RAG๋ก ์ ํํ์ฌ ์์ ์ฑ ํ๋ณด
|
14 |
+
|
15 |
+
### 2๋จ๊ณ: ํ
์คํธ ๊ธฐ๋ฐ RAG ์์คํ
๊ตฌ์ถ
|
16 |
+
|
17 |
+
#### 2.1 PDF ํ
์คํธ ์ถ์ถ ๊ฐ์
|
18 |
+
**ํ์ผ**: `lily_llm_core/document_processor.py`
|
19 |
+
```python
|
20 |
+
# ๋ณ๊ฒฝ ์ : ์ด๋ฏธ์ง ๊ธฐ๋ฐ OCR ์ฒ๋ฆฌ
|
21 |
+
def process_document(self, file_path: str) -> List[Document]:
|
22 |
+
if self.get_file_type(file_path) == 'pdf':
|
23 |
+
return self._process_pdf_as_images(file_path) # ์ด๋ฏธ์ง ๋ณํ
|
24 |
+
|
25 |
+
# ๋ณ๊ฒฝ ํ: ํ
์คํธ ์ง์ ์ถ์ถ
|
26 |
+
def process_document(self, file_path: str) -> List[Document]:
|
27 |
+
# ํ
์คํธ ๊ธฐ๋ฐ ์ฒ๋ฆฌ (๋ชจ๋ ํ์ผ ํ์)
|
28 |
+
documents = self.load_document(file_path)
|
29 |
+
split_docs = self.split_documents(documents)
|
30 |
+
return split_docs
|
31 |
+
```
|
32 |
+
|
33 |
+
#### 2.2 RAG ํ๋ก์ธ์ ๋จ์ํ
|
34 |
+
**ํ์ผ**: `lily_llm_core/rag_processor.py`
|
35 |
+
```python
|
36 |
+
# ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์ ๊ฑฐ, ํ
์คํธ ๊ธฐ๋ฐ์ผ๋ก ๋จ์ํ
|
37 |
+
def generate_rag_response(self, user_id: str, document_id: str, query: str,
|
38 |
+
llm_model=None, image_files: List[str] = None) -> Dict[str, Any]:
|
39 |
+
# 1. ์ ์ฌํ ๋ฌธ์ ๊ฒ์
|
40 |
+
similar_docs = vector_store_manager.search_similar(
|
41 |
+
user_id, document_id, query, k=self.max_search_results
|
42 |
+
)
|
43 |
+
|
44 |
+
# 2. ํ
์คํธ ๊ธฐ๋ฐ ์๋ต ์์ฑ
|
45 |
+
return self._generate_text_response(query, similar_docs, llm_model, image_files)
|
46 |
+
```
|
47 |
+
|
48 |
+
#### 2.3 LLM ์์ด๋ ์๋ํ๋ ๊ตฌ์กฐํ๋ ์๋ต
|
49 |
+
```python
|
50 |
+
def _generate_text_response(self, query: str, text_docs: List[Document],
|
51 |
+
llm_model, image_files: List[str] = None) -> Dict[str, Any]:
|
52 |
+
# ์ปจํ
์คํธ ๊ตฌ์ฑ (์์ ํ
์คํธ๋ฅผ ์ํด ๊ธธ์ด ์ ํ)
|
53 |
+
context = self._build_context(text_docs)
|
54 |
+
if len(context) > 2000:
|
55 |
+
context = context[:2000] + "..."
|
56 |
+
|
57 |
+
# LLM ๋ชจ๋ธ์ด ์์ผ๋ฉด ์๋ต ์์ฑ, ์์ผ๋ฉด ์ปจํ
์คํธ๋ง ๋ฐํ
|
58 |
+
if llm_model:
|
59 |
+
response = self._generate_with_llm_simple(prompt, llm_model)
|
60 |
+
else:
|
61 |
+
# ๊ตฌ์กฐํ๋ ํ
์คํธ ์๋ต ์์ฑ
|
62 |
+
response = f"""๋ฌธ์์์ ๊ฒ์๋ ๊ด๋ จ ๋ด์ฉ์ ๋ฐํ์ผ๋ก ๋ต๋ณ๋๋ฆฝ๋๋ค:
|
63 |
+
|
64 |
+
๐ ๊ฒ์๋ ๋ด์ฉ:
|
65 |
+
{context}
|
66 |
+
|
67 |
+
โ ์ง๋ฌธ: {query}
|
68 |
+
|
69 |
+
๐ก ๋ต๋ณ: ์ ๊ฒ์๋ ๋ด์ฉ์ ์ฐธ๊ณ ํ์ฌ ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ฐพ์๋ณด์๊ธฐ ๋ฐ๋๋๋ค."""
|
70 |
+
```
|
71 |
+
|
72 |
+
### 3๋จ๊ณ: ํ
์คํธ ์์คํ
๊ตฌ์ถ
|
73 |
+
|
74 |
+
#### 3.1 ์๋ฒ ์ฐ๊ฒฐ ๋ฐ ๋ฌธ์ ์
๋ก๋ ํ
์คํธ
|
75 |
+
**ํ์ผ**: `test_simple_rag.py`
|
76 |
+
```python
|
77 |
+
def test_server_connection():
|
78 |
+
response = requests.get("http://localhost:8001/health", timeout=10)
|
79 |
+
return response.status_code == 200
|
80 |
+
|
81 |
+
def test_document_upload():
|
82 |
+
response = requests.post(
|
83 |
+
"http://localhost:8001/document/upload",
|
84 |
+
files=files,
|
85 |
+
data=data,
|
86 |
+
timeout=120
|
87 |
+
)
|
88 |
+
```
|
89 |
+
|
90 |
+
#### 3.2 ํ
์คํธ ๊ธฐ๋ฐ RAG ํ
์คํธ
|
91 |
+
**ํ์ผ**: `test_text_only_rag.py`
|
92 |
+
```python
|
93 |
+
def test_text_only_rag():
|
94 |
+
test_queries = [
|
95 |
+
"1๋ฒ ๋ฌธ์ ",
|
96 |
+
"2๋ฒ ๋ฌธ์ ",
|
97 |
+
"3๋ฒ ๋ฌธ์ ",
|
98 |
+
"์ํ ๋ฌธ์ "
|
99 |
+
]
|
100 |
+
|
101 |
+
for query in test_queries:
|
102 |
+
rag_data = {
|
103 |
+
'user_id': 'test_user',
|
104 |
+
'document_id': document_id,
|
105 |
+
'query': query
|
106 |
+
}
|
107 |
+
response = requests.post(
|
108 |
+
f"{base_url}/rag/generate",
|
109 |
+
data=rag_data,
|
110 |
+
timeout=60
|
111 |
+
)
|
112 |
+
```
|
113 |
+
|
114 |
+
#### 3.3 ๊ตฌ์ฒด์ ์ธ ์ํ ๋ฌธ์ ์ง๋ฌธ ํ
์คํธ
|
115 |
+
**ํ์ผ**: `test_specific_questions.py`
|
116 |
+
```python
|
117 |
+
test_queries = [
|
118 |
+
"23๋ฒ ๋ฌธ์ ์ ๋ต์ ๋ฌด์์ธ๊ฐ์?",
|
119 |
+
"24๋ฒ ๋ฌธ์ ๋ฅผ ํ์ด์ฃผ์ธ์",
|
120 |
+
"15๋ฒ ๋ฌธ์ ์ ๋ต์ ๊ตฌํด์ฃผ์ธ์",
|
121 |
+
"23๋ฒ ๋ฌธ์ ์์ 5๊ฐ์ ๋ฌธ์๋ฅผ ์ผ๋ ฌ๋ก ๋์ดํ๋ ๊ฒฝ์ฐ์ ์๋?",
|
122 |
+
"24๋ฒ ๋ฌธ์ ์์ P(B)์ ๊ฐ์?"
|
123 |
+
]
|
124 |
+
```
|
125 |
+
|
126 |
+
### 4๋จ๊ณ: ์ฑ๊ณผ ํ์ธ
|
127 |
+
|
128 |
+
#### 4.1 ์ฑ๊ณตํ ๊ธฐ๋ฅ๋ค
|
129 |
+
- โ
**PDF ํ
์คํธ ์ถ์ถ ์๋ฒฝ**: ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
130 |
+
- โ
**๊ฒ์ ๊ธฐ๋ฅ ์๋ฒฝ**: ๊ด๋ จ ๋ฌธ์ ๋ค์ ์ ํํ ์ฐพ์๋
|
131 |
+
- โ
**๋น ๋ฅธ ์ฒ๋ฆฌ**: ์ฆ์ ์๋ต (LLM ์์ด๋ ์๋)
|
132 |
+
- โ
**๊ตฌ์กฐํ๋ ์๋ต**: ๋ฌธ์ ๋ถ์๊ณผ ํด๊ฒฐ ๋ฐฉ๋ฒ ์ ์
|
133 |
+
- โ
**์ ํํ ๋ฌธ์ ๋งค์นญ**: 23๋ฒ, 24๋ฒ, 15๋ฒ ๋ฌธ์ ์ ํํ ์ฐพ์
|
134 |
+
|
135 |
+
#### 4.2 ํ
์คํธ ๊ฒฐ๊ณผ
|
136 |
+
- **๋ฌธ์ ์
๋ก๋**: 12๊ฐ ์ฒญํฌ ์ฑ๊ณต
|
137 |
+
- **๊ฒ์ ๊ฒฐ๊ณผ**: 5๊ฐ์ฉ ์ ํํ ๋ฐํ
|
138 |
+
- **์๋ต ์๊ฐ**: ์ฆ์ (ํ ํฐ ID ๋ฌธ์ ํด๊ฒฐ๋จ)
|
139 |
+
- **๋ฌธ์ ์ธ์**: ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
140 |
+
|
141 |
+
## ๐ฏ ์ต์ข
์ฑ๊ณผ
|
142 |
+
|
143 |
+
### ํด๊ฒฐ๋ ๋ฌธ์ ๋ค
|
144 |
+
1. โ
**ํ ํฐ ID ๋ฌธ์ **: ํ
์คํธ ๊ธฐ๋ฐ์ผ๋ก ํด๊ฒฐ
|
145 |
+
2. โ
**PDF ํ
๏ฟฝ๏ฟฝํธ ์ถ์ถ**: ์ค์ ๋ฌธ์ ๋ด์ฉ ์ถ์ถ ์ฑ๊ณต
|
146 |
+
3. โ
**๋น ๋ฅธ ๊ฒ์ฆ**: ์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ์ฑ๊ณต
|
147 |
+
4. โ
**๊ตฌ์กฐํ๋ ์๋ต**: ๋ฌธ์ ๋ถ์๊ณผ ํด๊ฒฐ ๋ฐฉ๋ฒ ํฌํจ
|
148 |
+
|
149 |
+
### ํ์ฌ ์ํ
|
150 |
+
**"์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ๋น ๋ฅธ ๊ฒ์ฆ"** ๋ชฉํ ๋ฌ์ฑ!
|
151 |
+
|
152 |
+
- CPU ํ๊ฒฝ์์๋ ๋น ๋ฅด๊ฒ ์๋
|
153 |
+
- ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
154 |
+
- ๊ฒ์๊ณผ ์ปจํ
์คํธ ๋ฐํ ์๋ฒฝ ์๋
|
155 |
+
- ๊ตฌ์กฐํ๋ ์๋ต ์์ฑ
|
156 |
+
|
157 |
+
**๋ค์ ๋จ๊ณ**: ์๋ฒ์ ์ฌ๋ ค์ GPU๋ก ์ค์ฌ์ฉ ์ค๋น ์๋ฃ
|
158 |
+
|
159 |
+
## ๐ ์ฃผ์ ์์ ํ์ผ๋ค
|
160 |
+
|
161 |
+
### Core ํ์ผ๋ค
|
162 |
+
- `lily_llm_core/document_processor.py`: PDF ํ
์คํธ ์ถ์ถ ๊ฐ์
|
163 |
+
- `lily_llm_core/rag_processor.py`: RAG ํ๋ก์ธ์ ๋จ์ํ
|
164 |
+
- `lily_llm_api/app_v2.py`: ์๋ํฌ์ธํธ ์์
|
165 |
+
|
166 |
+
### ํ
์คํธ ํ์ผ๋ค
|
167 |
+
- `test_simple_rag.py`: ๊ธฐ๋ณธ ์ฐ๊ฒฐ ํ
์คํธ
|
168 |
+
- `test_text_only_rag.py`: ํ
์คํธ ๊ธฐ๋ฐ RAG ํ
์คํธ
|
169 |
+
- `test_specific_questions.py`: ๊ตฌ์ฒด์ ์ง๋ฌธ ํ
์คํธ
|
170 |
+
- `test_llm_rag.py`: LLM ํฌํจ ํ
์คํธ
|
171 |
+
|
172 |
+
## ๐ง ๊ธฐ์ ์ ๊ฐ์ ์ฌํญ
|
173 |
+
|
174 |
+
### 1. PDF ์ฒ๋ฆฌ ๋ฐฉ์ ๋ณ๊ฒฝ
|
175 |
+
- **์ด์ **: ์ด๋ฏธ์ง ๊ธฐ๋ฐ OCR โ ํ ํฐ ID ๋ฌธ์
|
176 |
+
- **ํ์ฌ**: ํ
์คํธ ์ง์ ์ถ์ถ โ ์์ ์ ์ฒ๋ฆฌ
|
177 |
+
|
178 |
+
### 2. RAG ํ๋ก์ธ์ ๋จ์ํ
|
179 |
+
- **์ด์ **: ๋ฉํฐ๋ชจ๋ฌ ๋ณต์กํ ์ฒ๋ฆฌ
|
180 |
+
- **ํ์ฌ**: ํ
์คํธ ๊ธฐ๋ฐ ๋จ์ ์ฒ๋ฆฌ
|
181 |
+
|
182 |
+
### 3. ์๋ต ๊ตฌ์กฐ ๊ฐ์
|
183 |
+
- **์ด์ **: ํ ํฐ ID ์๋ต
|
184 |
+
- **ํ์ฌ**: ๊ตฌ์กฐํ๋ ํ
์คํธ ์๋ต
|
185 |
+
|
186 |
+
## ๐ ๋ค์ ๋จ๊ณ ์ ์
|
187 |
+
|
188 |
+
1. **GPU ์๋ฒ ๋ฐฐํฌ**: ํ์ฌ CPU ํ
์คํธ ์๋ฃ
|
189 |
+
2. **LLM ํตํฉ ๊ฐ์ **: ํ ํฐ ๋์ฝ๋ฉ ๋ฌธ์ ํด๊ฒฐ
|
190 |
+
3. **์ค์ ๋ฌธ์ ํด๊ฒฐ**: ์ํ ๋ฌธ์ ํ์ด ๋ฅ๋ ฅ ํฅ์
|
191 |
+
4. **์ฑ๋ฅ ์ต์ ํ**: ๋ ํฐ ๋ฐ์ดํฐ์
์ฒ๋ฆฌ
|
HUGGINGFACE_CLOUD_GUIDE.md
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# โ๏ธ Hugging Face ํด๋ผ์ฐ๋ GPU ๋ฐฐํฌ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ๊ฐ์
|
4 |
+
|
5 |
+
์ด ๊ฐ์ด๋๋ ๋ก์ปฌ PC์์ Hugging Face ํด๋ผ์ฐ๋ GPU ํ๊ฒฝ์ ์ค์ ํ๊ณ , AI ๋ชจ๋ธ์ ๋ฐฐํฌํ ํ Railway์์ ์คํ ์ค์ธ Hearth Chat๊ณผ ์ฐ๋ํ๋ ๋ฐฉ๋ฒ์ ์ค๋ช
ํฉ๋๋ค.
|
6 |
+
|
7 |
+
## ๐ฏ ๋ชฉํ
|
8 |
+
|
9 |
+
1. **Hugging Face ํด๋ผ์ฐ๋ GPU ํ๊ฒฝ ์ค์ **
|
10 |
+
2. **AI ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋**
|
11 |
+
3. **Inference Endpoints ์์ฑ**
|
12 |
+
4. **Railway Hearth Chat๊ณผ ์ฐ๋**
|
13 |
+
|
14 |
+
## ๐ 1๋จ๊ณ: Hugging Face ๊ณ์ ์ค์
|
15 |
+
|
16 |
+
### 1.1 Hugging Face ๊ณ์ ์์ฑ
|
17 |
+
1. **Hugging Face ์น์ฌ์ดํธ ๋ฐฉ๋ฌธ**: https://huggingface.co
|
18 |
+
2. **ํ์๊ฐ์
**: ์ด๋ฉ์ผ๋ก ๊ณ์ ์์ฑ
|
19 |
+
3. **ํ๋กํ ์ค์ **: ์ฌ์ฉ์๋ช
์ค์ (์: `your-username`)
|
20 |
+
|
21 |
+
### 1.2 Access Token ์์ฑ
|
22 |
+
1. **Settings > Access Tokens**: https://huggingface.co/settings/tokens
|
23 |
+
2. **New Token ์์ฑ**:
|
24 |
+
- Name: `lily-math-rag-token`
|
25 |
+
- Role: `Write`
|
26 |
+
3. **ํ ํฐ ๋ณต์ฌ**: ์์ฑ๋ ํ ํฐ์ ์์ ํ ๊ณณ์ ์ ์ฅ
|
27 |
+
|
28 |
+
### 1.3 ๋ก์ปฌ ํ๊ฒฝ ์ค์
|
29 |
+
```bash
|
30 |
+
# Hugging Face CLI ์ค์น
|
31 |
+
pip install huggingface_hub
|
32 |
+
|
33 |
+
# ๋ก๊ทธ์ธ
|
34 |
+
huggingface-cli login
|
35 |
+
# ํ ํฐ ์
๋ ฅ ํ๋กฌํํธ์์ ์์์ ์์ฑํ ํ ํฐ ์
๋ ฅ
|
36 |
+
```
|
37 |
+
|
38 |
+
## ๐ง 2๋จ๊ณ: ๋ชจ๋ธ ์ค๋น ๋ฐ ์
๋ก๋
|
39 |
+
|
40 |
+
### 2.1 ๋ก์ปฌ ๋ชจ๋ธ ํ์ธ
|
41 |
+
```bash
|
42 |
+
cd C:\Project\lily_generate_project\lily_generate_package
|
43 |
+
ls hearth_llm_model/
|
44 |
+
```
|
45 |
+
|
46 |
+
### 2.2 ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋
|
47 |
+
```bash
|
48 |
+
# ๋ชจ๋ธ ์
๋ก๋
|
49 |
+
huggingface-cli upload your-username/lily-math-model hearth_llm_model/
|
50 |
+
|
51 |
+
# ๋๋ Python ์คํฌ๋ฆฝํธ ์ฌ์ฉ
|
52 |
+
python huggingface_cloud_setup.py
|
53 |
+
```
|
54 |
+
|
55 |
+
### 2.3 ๋ชจ๋ธ ์นด๋ ์์ฑ
|
56 |
+
```markdown
|
57 |
+
# ๋ชจ๋ธ ์นด๋ ์์ (README.md)
|
58 |
+
---
|
59 |
+
language: ko
|
60 |
+
tags:
|
61 |
+
- math
|
62 |
+
- rag
|
63 |
+
- korean
|
64 |
+
license: mit
|
65 |
+
---
|
66 |
+
|
67 |
+
# Lily Math RAG Model
|
68 |
+
|
69 |
+
์ํ ๋ฌธ์ ํด๊ฒฐ์ ์ํ ํ๊ตญ์ด RAG ๋ชจ๋ธ์
๋๋ค.
|
70 |
+
|
71 |
+
## ์ฌ์ฉ๋ฒ
|
72 |
+
|
73 |
+
```python
|
74 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
75 |
+
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained("your-username/lily-math-model")
|
77 |
+
model = AutoModelForCausalLM.from_pretrained("your-username/lily-math-model")
|
78 |
+
```
|
79 |
+
```
|
80 |
+
|
81 |
+
## โ๏ธ 3๋จ๊ณ: Hugging Face Inference Endpoints ์ค์
|
82 |
+
|
83 |
+
### 3.1 Inference Endpoints ์์ฑ
|
84 |
+
1. **Hugging Face ์น์ฌ์ดํธ ๋ฐฉ๋ฌธ**: https://huggingface.co/inference-endpoints
|
85 |
+
2. **New Endpoint ํด๋ฆญ**
|
86 |
+
3. **์ค์ ์
๋ ฅ**:
|
87 |
+
- **Repository**: `your-username/lily-math-model`
|
88 |
+
- **Framework**: `PyTorch`
|
89 |
+
- **Region**: `us-east-1` (๊ฐ์ฅ ๋น ๋ฆ)
|
90 |
+
- **Instance Type**: `gpu.t4.medium` (์์์ฉ)
|
91 |
+
- **Accelerator**: `GPU`
|
92 |
+
|
93 |
+
### 3.2 ์๋ํฌ์ธํธ ์ค์
|
94 |
+
```json
|
95 |
+
{
|
96 |
+
"repository": "your-username/lily-math-model",
|
97 |
+
"framework": "pytorch",
|
98 |
+
"accelerator": "gpu",
|
99 |
+
"instance_type": "gpu.t4.medium",
|
100 |
+
"region": "us-east-1",
|
101 |
+
"vendor": "aws"
|
102 |
+
}
|
103 |
+
```
|
104 |
+
|
105 |
+
### 3.3 ์๋ํฌ์ธํธ URL ํ์ธ
|
106 |
+
- ์์ฑ๋ ์๋ํฌ์ธํธ์ URL์ ๋ณต์ฌ
|
107 |
+
- ์: `https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud`
|
108 |
+
|
109 |
+
## ๐ 4๋จ๊ณ: Railway Hearth Chat ์ฐ๋
|
110 |
+
|
111 |
+
### 4.1 ํ๊ฒฝ ๋ณ์ ์ค์
|
112 |
+
```bash
|
113 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
114 |
+
export RAILWAY_HEARTH_CHAT_URL="https://hearth-chat-production.up.railway.app"
|
115 |
+
export HF_ENDPOINT_URL="https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud"
|
116 |
+
export HF_TOKEN="your-huggingface-token"
|
117 |
+
```
|
118 |
+
|
119 |
+
### 4.2 ์ฐ๋ ํ
์คํธ
|
120 |
+
```bash
|
121 |
+
# ์ฐ๋ ํ
์คํธ ์คํ
|
122 |
+
python railway_hearth_chat_integration.py
|
123 |
+
```
|
124 |
+
|
125 |
+
### 4.3 Hearth Chat API ์์ (ํ์์)
|
126 |
+
Railway Hearth Chat์์ Hugging Face ์๋ํฌ์ธํธ๋ฅผ ํธ์ถํ๋๋ก API๋ฅผ ์์ :
|
127 |
+
|
128 |
+
```javascript
|
129 |
+
// Hearth Chat API ์์
|
130 |
+
async function callHuggingFaceAPI(message) {
|
131 |
+
const response = await fetch(process.env.HF_ENDPOINT_URL, {
|
132 |
+
method: 'POST',
|
133 |
+
headers: {
|
134 |
+
'Authorization': `Bearer ${process.env.HF_TOKEN}`,
|
135 |
+
'Content-Type': 'application/json'
|
136 |
+
},
|
137 |
+
body: JSON.stringify({
|
138 |
+
inputs: message,
|
139 |
+
parameters: {
|
140 |
+
max_length: 200,
|
141 |
+
temperature: 0.7
|
142 |
+
}
|
143 |
+
})
|
144 |
+
});
|
145 |
+
|
146 |
+
const result = await response.json();
|
147 |
+
return result.generated_text;
|
148 |
+
}
|
149 |
+
```
|
150 |
+
|
151 |
+
## ๐งช 5๋จ๊ณ: ํ
์คํธ ๋ฐ ๊ฒ์ฆ
|
152 |
+
|
153 |
+
### 5.1 Hugging Face ์๋ํฌ์ธํธ ํ
์คํธ
|
154 |
+
```bash
|
155 |
+
# ์๋ํฌ์ธํธ ํ
์คํธ
|
156 |
+
curl -X POST https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud \
|
157 |
+
-H "Authorization: Bearer your-token" \
|
158 |
+
-H "Content-Type: application/json" \
|
159 |
+
-d '{
|
160 |
+
"inputs": "์๋
ํ์ธ์! ์ํ ๋ฌธ์ ๋ฅผ ๋์์ฃผ์ธ์.",
|
161 |
+
"parameters": {
|
162 |
+
"max_length": 100,
|
163 |
+
"temperature": 0.7
|
164 |
+
}
|
165 |
+
}'
|
166 |
+
```
|
167 |
+
|
168 |
+
### 5.2 Railway ์ฐ๋ ํ
์คํธ
|
169 |
+
```bash
|
170 |
+
# ์ ์ฒด ์ฐ๋ ํ
์คํธ
|
171 |
+
python test_railway_huggingface_integration.py
|
172 |
+
```
|
173 |
+
|
174 |
+
## ๐ 6๋จ๊ณ: ๋ชจ๋ํฐ๋ง ๋ฐ ์ต์ ํ
|
175 |
+
|
176 |
+
### 6.1 Hugging Face ๋ชจ๋ํฐ๋ง
|
177 |
+
- **Inference Endpoints ๋์๋ณด๋**: https://huggingface.co/inference-endpoints
|
178 |
+
- **์ฌ์ฉ๋ ํ์ธ**: GPU ์ฌ์ฉ๋, ์์ฒญ ์, ์๋ต ์๊ฐ
|
179 |
+
- **๋น์ฉ ๋ชจ๋ํฐ๋ง**: ์๋ณ ์ฌ์ฉ๋ ๋ฐ ๋น์ฉ
|
180 |
+
|
181 |
+
### 6.2 Railway ๋ชจ๋ํฐ๋ง
|
182 |
+
- **Railway ๋์๋ณด๋**: https://railway.app/dashboard
|
183 |
+
- **๋ก๊ทธ ํ์ธ**: ์ ํ๋ฆฌ์ผ์ด์
๋ก๊ทธ ๋ฐ ์ค๋ฅ
|
184 |
+
- **์ฑ๋ฅ ๋ชจ๋ํฐ๋ง**: ์๋ต ์๊ฐ, ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋
|
185 |
+
|
186 |
+
## ๐ง ๋ฌธ์ ํด๊ฒฐ
|
187 |
+
|
188 |
+
### Hugging Face ๊ด๋ จ ๋ฌธ์
|
189 |
+
1. **ํ ํฐ ์ธ์ฆ ์ค๋ฅ**: ํ ํฐ ์ฌ์์ฑ ๋ฐ ํ์ธ
|
190 |
+
2. **๋ชจ๋ธ ์
๋ก๋ ์คํจ**: ํ์ผ ํฌ๊ธฐ ๋ฐ ํ์ ํ์ธ
|
191 |
+
3. **์๋ํฌ์ธํธ ์์ฑ ์คํจ**: GPU ํ ๋น๋ ํ์ธ
|
192 |
+
|
193 |
+
### Railway ์ฐ๋ ๋ฌธ์
|
194 |
+
1. **์ฐ๊ฒฐ ์คํจ**: URL ๋ฐ ๋คํธ์ํฌ ํ์ธ
|
195 |
+
2. **API ์ค๋ฅ**: ์๋ํฌ์ธํธ ๋ฐ ํค๋ ํ์ธ
|
196 |
+
3. **์๋ต ์ง์ฐ**: ํ์์์ ์ค์ ์กฐ์
|
197 |
+
|
198 |
+
## ๐ฐ ๋น์ฉ ์ต์ ํ
|
199 |
+
|
200 |
+
### Hugging Face ๋น์ฉ
|
201 |
+
- **gpu.t4.medium**: $0.60/์๊ฐ (์์์ฉ)
|
202 |
+
- **gpu.t4.large**: $1.20/์๊ฐ (์ฑ๋ฅ ํฅ์)
|
203 |
+
- **gpu.a10g**: $2.40/์๊ฐ (๊ณ ์ฑ๋ฅ)
|
204 |
+
|
205 |
+
### ๋น์ฉ ์ ์ฝ ํ
|
206 |
+
1. **์๋ ์ค์ผ์ผ๋ง**: ์ฌ์ฉํ์ง ์์ ๋ ์๋ํฌ์ธํธ ์ค์ง
|
207 |
+
2. **์บ์ฑ**: ๋์ผํ ์์ฒญ์ ๋ํ ์๋ต ์บ์ฑ
|
208 |
+
3. **๋ฐฐ์น ์ฒ๋ฆฌ**: ์ฌ๋ฌ ์์ฒญ์ ํ ๋ฒ์ ์ฒ๋ฆฌ
|
209 |
+
|
210 |
+
## ๐ ๋ฐฐํฌ ์ฒดํฌ๋ฆฌ์คํธ
|
211 |
+
|
212 |
+
- [ ] Hugging Face ๊ณ์ ์์ฑ ๋ฐ ํ ํฐ ์ค์
|
213 |
+
- [ ] ๋ก์ปฌ ๋ชจ๋ธ ํ์ธ ๋ฐ ์
๋ก๋
|
214 |
+
- [ ] Inference Endpoints ์์ฑ
|
215 |
+
- [ ] ์๋ํฌ์ธํธ URL ๋ฐ ํ ํฐ ํ์ธ
|
216 |
+
- [ ] Railway Hearth Chat URL ํ์ธ
|
217 |
+
- [ ] ํ๊ฒฝ ๋ณ์ ์ค์
|
218 |
+
- [ ] ์ฐ๋ ํ
์คํธ ์คํ
|
219 |
+
- [ ] ๋ชจ๋ํฐ๋ง ์ค์
|
220 |
+
- [ ] ๋น์ฉ ์ต์ ํ ์ค์
|
221 |
+
|
222 |
+
## ๐ ์ง์
|
223 |
+
|
224 |
+
### ์ ์ฉํ ๋งํฌ
|
225 |
+
- **Hugging Face ๋ฌธ์**: https://huggingface.co/docs
|
226 |
+
- **Inference Endpoints ๊ฐ์ด๋**: https://huggingface.co/docs/inference-endpoints
|
227 |
+
- **Railway ๋ฌธ์**: https://docs.railway.app
|
228 |
+
|
229 |
+
### ๋ฌธ์ ํด๊ฒฐ
|
230 |
+
1. **Hugging Face ์ง์**: https://huggingface.co/support
|
231 |
+
2. **Railway ์ง์**: https://railway.app/support
|
232 |
+
3. **์ปค๋ฎค๋ํฐ**: GitHub Issues ๋ฐ Discord
|
233 |
+
|
234 |
+
## ๐ ์ฑ๊ณต ํ์ธ
|
235 |
+
|
236 |
+
๋ชจ๋ ์ค์ ์ด ์๋ฃ๋๋ฉด ๋ค์์ ํ์ธํ ์ ์์ต๋๋ค:
|
237 |
+
|
238 |
+
- โ
**Hugging Face ์๋ํฌ์ธํธ**: GPU์์ AI ๋ชจ๋ธ ์คํ
|
239 |
+
- โ
**Railway Hearth Chat**: ์น ์ธํฐํ์ด์ค์์ ์ฑํ
๊ฐ๋ฅ
|
240 |
+
- โ
**์ฐ๋**: ์ฌ์ฉ์ ๋ฉ์์ง โ Hugging Face โ AI ์๋ต โ Hearth Chat
|
241 |
+
- โ
**๋ชจ๋ํฐ๋ง**: ์ค์๊ฐ ์ฌ์ฉ๋ ๋ฐ ์ฑ๋ฅ ํ์ธ
|
PROMPT.md
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM RAG ์์คํ
๊ฐ๋ฐ - ์ ์ฑํ
์ฐฝ AI ํ๋กฌํํธ
|
2 |
+
|
3 |
+
## ๐ฏ ํ๋ก์ ํธ ๋ชฉํ
|
4 |
+
PDF ๋ฌธ์์ ์ํ ๋ฌธ์ ํด์ ๋ฐ ํด๊ฒฐ์ ์ํ RAG ์์คํ
์ ๊ตฌ์ถํ๊ณ ์์ต๋๋ค. ํ์ฌ CPU ํ๊ฒฝ์์ ์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ๋น ๋ฅธ ๊ฒ์ฆ์ ์๋ฃํ์ผ๋ฉฐ, ๋ค์ ๋จ๊ณ๋ก GPU ์๋ฒ ๋ฐฐํฌ ๋ฐ ์ค์ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ ํฅ์์ ์งํํด์ผ ํฉ๋๋ค.
|
5 |
+
|
6 |
+
## ๐ ํ์ฌ ์ํฉ
|
7 |
+
|
8 |
+
### โ
์๋ฃ๋ ์์
|
9 |
+
1. **ํ
์คํธ ๊ธฐ๋ฐ RAG ์์คํ
๊ตฌ์ถ**
|
10 |
+
- PDF ํ
์คํธ ์ง์ ์ถ์ถ (์ด๋ฏธ์ง OCR ๋์ )
|
11 |
+
- RAG ํ๋ก์ธ์ ๋จ์ํ (๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์ ๊ฑฐ)
|
12 |
+
- LLM ์์ด๋ ์๋ํ๋ ๊ตฌ์กฐํ๋ ์๋ต
|
13 |
+
|
14 |
+
2. **ํ
์คํธ ์์คํ
๊ตฌ์ถ**
|
15 |
+
- ์๋ฒ ์ฐ๊ฒฐ ๋ฐ ๋ฌธ์ ์
๋ก๋ ํ
์คํธ
|
16 |
+
- ํ
์คํธ ๊ธฐ๋ฐ RAG ์ฟผ๋ฆฌ ํ
์คํธ
|
17 |
+
- ๊ตฌ์ฒด์ ์ธ ์ํ ๋ฌธ์ ์ง๋ฌธ ํ
์คํธ
|
18 |
+
|
19 |
+
3. **์ฑ๊ณผ ํ์ธ**
|
20 |
+
- PDF ํ
์คํธ ์ถ์ถ ์๋ฒฝ ์๋
|
21 |
+
- ๊ฒ์ ๊ธฐ๋ฅ ์๋ฒฝ ์๋ (5๊ฐ ๊ฒ์ ๊ฒฐ๊ณผ)
|
22 |
+
- ๋น ๋ฅธ ์ฒ๋ฆฌ (์ฆ์ ์๋ต)
|
23 |
+
- ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
24 |
+
|
25 |
+
### ๐ง ํด๊ฒฐ๋ ๋ฌธ์ ๋ค
|
26 |
+
1. **ํ ํฐ ID ๋ฌธ์ **: ํ
์คํธ ๊ธฐ๋ฐ์ผ๋ก ํด๊ฒฐ
|
27 |
+
2. **PDF ํ
์คํธ ์ถ์ถ**: ์ค์ ๋ฌธ์ ๋ด์ฉ ์ถ์ถ ์ฑ๊ณต
|
28 |
+
3. **๋น ๋ฅธ ๊ฒ์ฆ**: ์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ์ฑ๊ณต
|
29 |
+
4. **๊ตฌ์กฐํ๋ ์๋ต**: ๋ฌธ์ ๋ถ์๊ณผ ํด๊ฒฐ ๋ฐฉ๋ฒ ํฌํจ
|
30 |
+
|
31 |
+
## ๐ ์ฃผ์ ํ์ผ ๊ตฌ์กฐ
|
32 |
+
|
33 |
+
### Core ํ์ผ๋ค
|
34 |
+
- `lily_llm_core/document_processor.py`: PDF ํ
์คํธ ์ถ์ถ ๊ฐ์
|
35 |
+
- `lily_llm_core/rag_processor.py`: RAG ํ๋ก์ธ์ ๋จ์ํ
|
36 |
+
- `lily_llm_api/app_v2.py`: ์๋ํฌ์ธํธ ์์
|
37 |
+
|
38 |
+
### ํ
์คํธ ํ์ผ๋ค
|
39 |
+
- `test_simple_rag.py`: ๊ธฐ๋ณธ ์ฐ๊ฒฐ ํ
์คํธ
|
40 |
+
- `test_text_only_rag.py`: ํ
์คํธ ๊ธฐ๋ฐ RAG ํ
์คํธ
|
41 |
+
- `test_specific_questions.py`: ๊ตฌ์ฒด์ ์ง๋ฌธ ํ
์คํธ
|
42 |
+
- `test_llm_rag.py`: LLM ํฌํจ ํ
์คํธ
|
43 |
+
|
44 |
+
## ๐ ๋ค์ ๋จ๊ณ ์์ฒญ์ฌํญ
|
45 |
+
|
46 |
+
### 1. GPU ์๋ฒ ๋ฐฐํฌ ์ค๋น
|
47 |
+
- ํ์ฌ CPU ํ๊ฒฝ์์ ํ
์คํธ ์๋ฃ
|
48 |
+
- GPU ํ๊ฒฝ์์์ ์ฑ๋ฅ ์ต์ ํ
|
49 |
+
- ์๋ฒ ๋ฐฐํฌ ์๋ํ ์คํฌ๋ฆฝํธ ์์ฑ
|
50 |
+
|
51 |
+
### 2. LLM ํตํฉ ๊ฐ์
|
52 |
+
- Kanana ๋ชจ๋ธ์ ํ ํฐ ๋์ฝ๋ฉ ๋ฌธ์ ํด๊ฒฐ
|
53 |
+
- ์ค์ ์ํ ๋ฌธ์ ํ์ด ๋ฅ๋ ฅ ํฅ์
|
54 |
+
- ๋ ์ ํํ ๋ต๋ณ ์์ฑ
|
55 |
+
|
56 |
+
### 3. ์ค์ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ ํฅ์
|
57 |
+
- ์ํ ๋ฌธ์ ํ์ด ๋ก์ง ๊ฐ์
|
58 |
+
- ๋จ๊ณ๋ณ ํด๊ฒฐ ๊ณผ์ ์ ์
|
59 |
+
- ์ ๋ต๊ณผ ํด์ค ์ ๊ณต
|
60 |
+
|
61 |
+
### 4. ์ฑ๋ฅ ์ต์ ํ
|
62 |
+
- ๋ ํฐ ๋ฐ์ดํฐ์
์ฒ๋ฆฌ
|
63 |
+
- ์๋ต ์๋ ๊ฐ์
|
64 |
+
- ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ์ต์ ํ
|
65 |
+
|
66 |
+
## ๐ก ๊ธฐ์ ์ ์๊ตฌ์ฌํญ
|
67 |
+
|
68 |
+
### ํ์ฌ ํ๊ฒฝ
|
69 |
+
- **OS**: Windows 10
|
70 |
+
- **Python**: ๊ฐ์ํ๊ฒฝ (lily_llm_env)
|
71 |
+
- **์๋ฒ**: FastAPI (ํฌํธ 8001)
|
72 |
+
- **๋ชจ๋ธ**: Kanana-1.5-v-3b-instruct
|
73 |
+
- **๋ฒกํฐ ์คํ ์ด**: FAISS
|
74 |
+
|
75 |
+
### ๋ชฉํ ํ๊ฒฝ
|
76 |
+
- **GPU ์๋ฒ**: ๋ ๋น ๋ฅธ ์ฒ๋ฆฌ ์๋
|
77 |
+
- **ํ์ฅ์ฑ**: ๋ ํฐ ๋ฌธ์ ์ฒ๋ฆฌ
|
78 |
+
- **์ ํ์ฑ**: ์ค์ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ
|
79 |
+
|
80 |
+
## ๐ ํ์ฌ ํ
์คํธ ๊ฒฐ๊ณผ
|
81 |
+
|
82 |
+
### ์ฑ๊ณตํ ๊ธฐ๋ฅ๋ค
|
83 |
+
- โ
**PDF ํ
์คํธ ์ถ์ถ ์๋ฒฝ**: ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
84 |
+
- โ
**๊ฒ์ ๊ธฐ๋ฅ ์๋ฒฝ**: ๊ด๋ จ ๋ฌธ์ ๋ค์ ์ ํํ ์ฐพ์๋
|
85 |
+
- โ
**๋น ๋ฅธ ์ฒ๋ฆฌ**: ์ฆ์ ์๋ต (LLM ์์ด๋ ์๋)
|
86 |
+
- โ
**๊ตฌ์กฐํ๋ ์๋ต**: ๋ฌธ์ ๋ถ์๊ณผ ํด๊ฒฐ ๋ฐฉ๋ฒ ์ ์
|
87 |
+
- โ
**์ ํํ ๋ฌธ์ ๋งค์นญ**: 23๋ฒ, 24๋ฒ, 15๋ฒ ๋ฌธ์ ์ ํํ ์ฐพ์
|
88 |
+
|
89 |
+
### ํ
์คํธ ๊ฒฐ๊ณผ
|
90 |
+
- **๋ฌธ์ ์
๋ก๋**: 12๊ฐ ์ฒญํฌ ์ฑ๊ณต
|
91 |
+
- **๊ฒ์ ๊ฒฐ๊ณผ**: 5๊ฐ์ฉ ์ ํํ ๋ฐํ
|
92 |
+
- **์๋ต ์๊ฐ**: ์ฆ์ (ํ ํฐ ID ๋ฌธ์ ํด๊ฒฐ๋จ)
|
93 |
+
- **๋ฌธ์ ์ธ์**: ์ค์ ์ํ ๋ฌธ์ ๋ด์ฉ ์ ํํ ์ถ์ถ
|
94 |
+
|
95 |
+
## ๐ ์์ฒญ์ฌํญ
|
96 |
+
|
97 |
+
์๋ก์ด AI ์ด์์คํดํธ์๊ฒ ๋ค์ ์์
์ ์์ฒญํฉ๋๋ค:
|
98 |
+
|
99 |
+
1. **ํ์ฌ ์ฝ๋๋ฒ ์ด์ค ๋ถ์**: ์์์ ์ธ๊ธ๋ ํ์ผ๋ค์ ๊ฒํ ํ์ฌ ํ์ฌ ์ํ ํ์
|
100 |
+
2. **GPU ์๋ฒ ๋ฐฐํฌ ๊ณํ ์๋ฆฝ**: ํ์ฌ CPU ํ
์คํธ ์๋ฃ ์ํ์์ GPU ํ๊ฒฝ์ผ๋ก ์ ํ
|
101 |
+
3. **LLM ํตํฉ ๊ฐ์ **: ํ ํฐ ๋์ฝ๋ฉ ๋ฌธ์ ํด๊ฒฐ ๋ฐ ์ค์ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ ํฅ์
|
102 |
+
4. **์ฑ๋ฅ ์ต์ ํ**: ๋ ํฐ ๋ฐ์ดํฐ์
์ฒ๋ฆฌ ๋ฐ ์๋ต ์๋ ๊ฐ์
|
103 |
+
5. **์ค์ ๋ฌธ์ ํด๊ฒฐ**: ์ํ ๋ฌธ์ ํ์ด ๋ก์ง ๊ฐ์ ๋ฐ ์ ํํ ๋ต๋ณ ์์ฑ
|
104 |
+
|
105 |
+
ํ์ฌ "์์ ํ
์คํธ ๋ฐ์ดํฐ๋ก ๋น ๋ฅธ ๊ฒ์ฆ" ๋ชฉํ๋ ๋ฌ์ฑํ์ผ๋ฏ๋ก, ๋ค์ ๋จ๊ณ์ธ "์ค์ ๋ฌธ์ ํด๊ฒฐ ๋ฅ๋ ฅ ํฅ์"๊ณผ "GPU ์๋ฒ ๋ฐฐํฌ"์ ์ง์คํด์ฃผ์ธ์.
|
README.md
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API - Hugging Face Spaces
|
2 |
+
|
3 |
+
## ๐ค ์๊ฐ
|
4 |
+
|
5 |
+
Lily LLM API๋ ๋ค์ค ๋ชจ๋ธ ์ง์๊ณผ RAG(Retrieval Augmented Generation) ์์คํ
์ ๊ฐ์ถ ๊ณ ์ฑ๋ฅ AI API ์๋ฒ์
๋๋ค.
|
6 |
+
|
7 |
+
### โจ ์ฃผ์ ๊ธฐ๋ฅ
|
8 |
+
|
9 |
+
- **๐ง ๋ฉํฐ๋ชจ๋ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์ ํตํ ํ
์คํธ ๋ฐ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
10 |
+
- **๐ RAG ์์คํ
**: ๋ฌธ์ ๊ธฐ๋ฐ ์ง์์๋ต ๋ฐ ์ปจํ
์คํธ ๊ฒ์
|
11 |
+
- **๐ ๋ฒกํฐ ๊ฒ์**: FAISS ๊ธฐ๋ฐ ๊ณ ์ ์ ์ฌ๋ ๊ฒ์
|
12 |
+
- **๐ ๋ฌธ์ ์ฒ๋ฆฌ**: PDF, DOCX, TXT ๋ฑ ๋ค์ํ ๋ฌธ์ ํ์ ์ง์
|
13 |
+
- **๐ผ๏ธ ์ด๋ฏธ์ง OCR**: LaTeX-OCR์ ํตํ ์ํ ๊ณต์ ์ธ์
|
14 |
+
- **โก ๋น๋๊ธฐ ์ฒ๋ฆฌ**: Celery ๊ธฐ๋ฐ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
|
15 |
+
- **๐ RESTful API**: FastAPI ๊ธฐ๋ฐ ๊ณ ์ฑ๋ฅ ์น API
|
16 |
+
|
17 |
+
### ๐ ์ฌ์ฉ ๋ฐฉ๋ฒ
|
18 |
+
|
19 |
+
#### 1. ํ
์คํธ ์์ฑ
|
20 |
+
|
21 |
+
```python
|
22 |
+
import requests
|
23 |
+
|
24 |
+
response = requests.post(
|
25 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
|
26 |
+
data={"prompt": "์๋
ํ์ธ์! ์ค๋ ๋ ์จ๊ฐ ์ด๋ค๊ฐ์?"}
|
27 |
+
)
|
28 |
+
print(response.json())
|
29 |
+
```
|
30 |
+
|
31 |
+
#### 2. ์ด๋ฏธ์ง์ ํจ๊ป ์ง์
|
32 |
+
|
33 |
+
```python
|
34 |
+
import requests
|
35 |
+
|
36 |
+
with open("image.jpg", "rb") as f:
|
37 |
+
response = requests.post(
|
38 |
+
"https://https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
|
39 |
+
data={"prompt": "์ด๋ฏธ์ง์์ ๋ฌด์์ ๋ณผ ์ ์๋์?"},
|
40 |
+
files={"image1": f}
|
41 |
+
)
|
42 |
+
print(response.json())
|
43 |
+
```
|
44 |
+
|
45 |
+
#### 3. RAG ๊ธฐ๋ฐ ์ง์์๋ต
|
46 |
+
|
47 |
+
```python
|
48 |
+
import requests
|
49 |
+
|
50 |
+
# ๋ฌธ์ ์
๋ก๋
|
51 |
+
with open("document.pdf", "rb") as f:
|
52 |
+
upload_response = requests.post(
|
53 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/upload-document",
|
54 |
+
files={"file": f},
|
55 |
+
data={"user_id": "your_user_id"}
|
56 |
+
)
|
57 |
+
|
58 |
+
document_id = upload_response.json()["document_id"]
|
59 |
+
|
60 |
+
# RAG ์ง์
|
61 |
+
response = requests.post(
|
62 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/rag-query",
|
63 |
+
json={
|
64 |
+
"query": "๋ฌธ์์ ์ฃผ์ ๋ด์ฉ์ ๋ฌด์์ธ๊ฐ์?",
|
65 |
+
"user_id": "your_user_id",
|
66 |
+
"document_id": document_id
|
67 |
+
}
|
68 |
+
)
|
69 |
+
print(response.json())
|
70 |
+
```
|
71 |
+
|
72 |
+
### ๐ API ์๋ํฌ์ธํธ
|
73 |
+
|
74 |
+
#### ๊ธฐ๋ณธ ์๋ํฌ์ธํธ
|
75 |
+
- `GET /health` - ์๋ฒ ์ํ ํ์ธ
|
76 |
+
- `GET /models` - ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก
|
77 |
+
- `POST /load-model` - ๋ชจ๋ธ ๋ก๋
|
78 |
+
- `POST /generate` - ํ
์คํธ/์ด๋ฏธ์ง ์์ฑ
|
79 |
+
|
80 |
+
#### RAG ์์คํ
|
81 |
+
- `POST /upload-document` - ๋ฌธ์ ์
๋ก๋
|
82 |
+
- `POST /rag-query` - RAG ๊ธฐ๋ฐ ์ง์
|
83 |
+
- `GET /documents/{user_id}` - ์ฌ์ฉ์ ๋ฌธ์ ๋ชฉ๋ก
|
84 |
+
- `DELETE /document/{document_id}` - ๋ฌธ์ ์ญ์
|
85 |
+
|
86 |
+
#### ๊ณ ๊ธ ๊ธฐ๋ฅ
|
87 |
+
- `POST /batch-process` - ๋ฐฐ์น ๋ฌธ์ ์ฒ๋ฆฌ
|
88 |
+
- `GET /task-status/{task_id}` - ์์
์ํ ํ์ธ
|
89 |
+
- `POST /cancel-task/{task_id}` - ์์
์ทจ์
|
90 |
+
|
91 |
+
### ๐ ๏ธ ๊ธฐ์ ์คํ
|
92 |
+
|
93 |
+
- **Backend**: FastAPI, Python 3.11
|
94 |
+
- **AI Models**: Transformers, PyTorch
|
95 |
+
- **Vector DB**: FAISS, ChromaDB
|
96 |
+
- **Task Queue**: Celery, Redis
|
97 |
+
- **OCR**: LaTeX-OCR, EasyOCR
|
98 |
+
- **Document Processing**: LangChain
|
99 |
+
|
100 |
+
### ๐ ๋ชจ๋ธ ์ ๋ณด
|
101 |
+
|
102 |
+
#### Kanana-1.5-v-3b-instruct
|
103 |
+
- **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐ๋ณ์
|
104 |
+
- **์ธ์ด**: ํ๊ตญ์ด ํนํ
|
105 |
+
- **๊ธฐ๋ฅ**: ํ
์คํธ ์์ฑ, ์ด๋ฏธ์ง ์ดํด
|
106 |
+
- **์ปจํ
์คํธ**: ์ต๋ 4096 ํ ํฐ
|
107 |
+
|
108 |
+
### ๐ง ์ค์
|
109 |
+
|
110 |
+
ํ๊ฒฝ ๋ณ์๋ฅผ ํตํด ๋ค์ ์ค์ ์ ์กฐ์ ํ ์ ์์ต๋๋ค:
|
111 |
+
|
112 |
+
```bash
|
113 |
+
# ์๋ฒ ์ค์
|
114 |
+
HOST=0.0.0.0
|
115 |
+
PORT=7860
|
116 |
+
|
117 |
+
# ๋ชจ๋ธ ์ค์
|
118 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
119 |
+
MAX_NEW_TOKENS=256
|
120 |
+
TEMPERATURE=0.7
|
121 |
+
|
122 |
+
# ์บ์ ์ค์
|
123 |
+
TRANSFORMERS_CACHE=/app/cache/transformers
|
124 |
+
HF_HOME=/app/cache/huggingface
|
125 |
+
```
|
126 |
+
|
127 |
+
### ๐ ๋ผ์ด์ ์ค
|
128 |
+
|
129 |
+
์ด ํ๋ก์ ํธ๋ MIT ๋ผ์ด์ ์ค ํ์ ๋ฐฐํฌ๋ฉ๋๋ค.
|
130 |
+
|
131 |
+
### ๐ค ๊ธฐ์ฌ
|
132 |
+
|
133 |
+
๋ฒ๊ทธ ๋ฆฌํฌํธ, ๊ธฐ๋ฅ ์ ์, ํ ๋ฆฌํ์คํธ๋ฅผ ํ์ํฉ๋๋ค!
|
134 |
+
|
135 |
+
### ๐ ์ง์
|
136 |
+
|
137 |
+
๋ฌธ์์ฌํญ์ด ์์ผ์๋ฉด GitHub Issues๋ฅผ ํตํด ์ฐ๋ฝํด ์ฃผ์ธ์.
|
README_DEPLOYMENT.md
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API ๋ฐฐํฌ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ๊ฐ์
|
4 |
+
|
5 |
+
์ด ๋ฌธ์๋ Lily LLM API ์๋ฒ์ Docker ๊ธฐ๋ฐ ๋ฐฐํฌ ๋ฐฉ๋ฒ์ ์ค๋ช
ํฉ๋๋ค.
|
6 |
+
|
7 |
+
## ๐๏ธ ์ํคํ
์ฒ
|
8 |
+
|
9 |
+
```
|
10 |
+
โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
|
11 |
+
โ Lily LLM API โ โ Redis โ โ Celery Worker โ
|
12 |
+
โ (FastAPI) โโโโโบโ (Message โโโโโบโ (Background โ
|
13 |
+
โ Port: 8001 โ โ Broker) โ โ Tasks) โ
|
14 |
+
โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
|
15 |
+
โ โ โ
|
16 |
+
โ โ โ
|
17 |
+
โผ โผ โผ
|
18 |
+
โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
|
19 |
+
โ Flower โ โ Celery Beat โ โ Database โ
|
20 |
+
โ (Monitoring) โ โ (Scheduler) โ โ (SQLite) โ
|
21 |
+
โ Port: 5555 โ โ โ โ โ
|
22 |
+
โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ โโโโโโโโโโโโโโโโโโโ
|
23 |
+
```
|
24 |
+
|
25 |
+
## ๐ ๋น ๋ฅธ ์์
|
26 |
+
|
27 |
+
### 1. ์ฌ์ ์๊ตฌ์ฌํญ
|
28 |
+
|
29 |
+
- Docker
|
30 |
+
- Docker Compose
|
31 |
+
- ์ต์ 4GB RAM
|
32 |
+
- ์ต์ 10GB ๋์คํฌ ๊ณต๊ฐ
|
33 |
+
|
34 |
+
### 2. ๋ฐฐํฌ ์คํ
|
35 |
+
|
36 |
+
```bash
|
37 |
+
# ์ ์ฅ์ ํด๋ก
|
38 |
+
git clone <repository-url>
|
39 |
+
cd lily_generate_package
|
40 |
+
|
41 |
+
# ๋ฐฐํฌ ์คํฌ๋ฆฝํธ ์คํ
|
42 |
+
chmod +x scripts/deploy.sh
|
43 |
+
./scripts/deploy.sh deploy
|
44 |
+
```
|
45 |
+
|
46 |
+
### 3. ์๋น์ค ํ์ธ
|
47 |
+
|
48 |
+
```bash
|
49 |
+
# ์๋น์ค ์ํ ํ์ธ
|
50 |
+
./scripts/deploy.sh status
|
51 |
+
|
52 |
+
# ๋ก๊ทธ ํ์ธ
|
53 |
+
./scripts/deploy.sh logs
|
54 |
+
```
|
55 |
+
|
56 |
+
## ๐ฆ Docker ์ปจํ
์ด๋
|
57 |
+
|
58 |
+
### ์ฃผ์ ์๋น์ค
|
59 |
+
|
60 |
+
| ์๋น์ค | ํฌํธ | ์ค๋ช
|
|
61 |
+
|--------|------|------|
|
62 |
+
| lily-llm-api | 8001 | FastAPI ๋ฉ์ธ ์๋ฒ |
|
63 |
+
| redis | 6379 | ๋ฉ์์ง ๋ธ๋ก์ปค ๋ฐ ์บ์ |
|
64 |
+
| celery-worker | - | ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์ฒ๋ฆฌ |
|
65 |
+
| celery-beat | - | ์ค์ผ์ค๋ฌ |
|
66 |
+
| flower | 5555 | Celery ๋ชจ๋ํฐ๋ง |
|
67 |
+
|
68 |
+
### ํ๊ฒฝ ๋ณ์
|
69 |
+
|
70 |
+
```yaml
|
71 |
+
# API ์๋ฒ
|
72 |
+
REDIS_URL: redis://redis:6379
|
73 |
+
DATABASE_URL: sqlite:///app/data/lily_llm.db
|
74 |
+
LOG_LEVEL: INFO
|
75 |
+
CELERY_BROKER_URL: redis://redis:6379/0
|
76 |
+
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
77 |
+
```
|
78 |
+
|
79 |
+
## ๐ง ๋ฐฐํฌ ์คํฌ๋ฆฝํธ
|
80 |
+
|
81 |
+
### ์ฌ์ฉ ๊ฐ๋ฅํ ๋ช
๋ น์ด
|
82 |
+
|
83 |
+
```bash
|
84 |
+
# ์ ์ฒด ๋ฐฐํฌ
|
85 |
+
./scripts/deploy.sh deploy
|
86 |
+
|
87 |
+
# ์๋น์ค ์์
|
88 |
+
./scripts/deploy.sh start
|
89 |
+
|
90 |
+
# ์๋น์ค ์ค์ง
|
91 |
+
./scripts/deploy.sh stop
|
92 |
+
|
93 |
+
# ์๋น์ค ์ฌ์์
|
94 |
+
./scripts/deploy.sh restart
|
95 |
+
|
96 |
+
# ๋ฐฐํฌ ์
๋ฐ์ดํธ
|
97 |
+
./scripts/deploy.sh update
|
98 |
+
|
99 |
+
# ๋ก๊ทธ ํ์ธ
|
100 |
+
./scripts/deploy.sh logs
|
101 |
+
|
102 |
+
# ์ํ ํ์ธ
|
103 |
+
./scripts/deploy.sh status
|
104 |
+
|
105 |
+
# ์ ๋ฆฌ
|
106 |
+
./scripts/deploy.sh cleanup
|
107 |
+
|
108 |
+
# ์ด๋ฏธ์ง ๋น๋
|
109 |
+
./scripts/deploy.sh build
|
110 |
+
```
|
111 |
+
|
112 |
+
## ๐งช ํ
์คํธ
|
113 |
+
|
114 |
+
### Docker ๋ฐฐํฌ ํ
์คํธ
|
115 |
+
|
116 |
+
```bash
|
117 |
+
python scripts/test_docker_deployment.py
|
118 |
+
```
|
119 |
+
|
120 |
+
### ์๋ ํ
์คํธ
|
121 |
+
|
122 |
+
```bash
|
123 |
+
# API ์ํ ํ์ธ
|
124 |
+
curl http://localhost:8001/health
|
125 |
+
|
126 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก ํ์ธ
|
127 |
+
curl http://localhost:8001/models
|
128 |
+
|
129 |
+
# ํ
์คํธ ์์ฑ ํ
์คํธ
|
130 |
+
curl -X POST http://localhost:8001/generate \
|
131 |
+
-H "Content-Type: application/x-www-form-urlencoded" \
|
132 |
+
-d "prompt=์๋
ํ์ธ์&model_id=polyglot-ko-1.3b-chat&max_length=50"
|
133 |
+
```
|
134 |
+
|
135 |
+
## ๐ ๋ชจ๋ํฐ๋ง
|
136 |
+
|
137 |
+
### Flower (Celery ๋ชจ๋ํฐ๋ง)
|
138 |
+
|
139 |
+
- URL: http://localhost:5555
|
140 |
+
- ์์
์ํ, ์์ปค ์ํ, ์ฑ๋ฅ ๋ฉํธ๋ฆญ ํ์ธ
|
141 |
+
|
142 |
+
### API ๋ชจ๋ํฐ๋ง
|
143 |
+
|
144 |
+
```bash
|
145 |
+
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง ์์
|
146 |
+
curl -X POST http://localhost:8001/monitoring/start
|
147 |
+
|
148 |
+
# ์ฑ๋ฅ ์ํ ํ์ธ
|
149 |
+
curl http://localhost:8001/monitoring/status
|
150 |
+
|
151 |
+
# ์์คํ
๊ฑด๊ฐ ์ํ
|
152 |
+
curl http://localhost:8001/monitoring/health
|
153 |
+
```
|
154 |
+
|
155 |
+
## ๐ ๋ณด์
|
156 |
+
|
157 |
+
### ํ๊ฒฝ ๋ณ์ ๊ด๋ฆฌ
|
158 |
+
|
159 |
+
```bash
|
160 |
+
# .env ํ์ผ ์์ฑ
|
161 |
+
cat > .env << EOF
|
162 |
+
SECRET_KEY=your-secret-key-here
|
163 |
+
JWT_SECRET_KEY=your-jwt-secret-key
|
164 |
+
DATABASE_URL=sqlite:///app/data/lily_llm.db
|
165 |
+
REDIS_URL=redis://redis:6379
|
166 |
+
EOF
|
167 |
+
```
|
168 |
+
|
169 |
+
### ๋ฐฉํ๋ฒฝ ์ค์
|
170 |
+
|
171 |
+
```bash
|
172 |
+
# ํ์ํ ํฌํธ๋ง ์ด๊ธฐ
|
173 |
+
sudo ufw allow 8001 # API ์๋ฒ
|
174 |
+
sudo ufw allow 5555 # Flower ๋ชจ๋ํฐ๋ง
|
175 |
+
```
|
176 |
+
|
177 |
+
## ๐ ์ฑ๋ฅ ์ต์ ํ
|
178 |
+
|
179 |
+
### ๋ฆฌ์์ค ์ ํ
|
180 |
+
|
181 |
+
```yaml
|
182 |
+
# docker-compose.yml์ ์ถ๊ฐ
|
183 |
+
services:
|
184 |
+
lily-llm-api:
|
185 |
+
deploy:
|
186 |
+
resources:
|
187 |
+
limits:
|
188 |
+
memory: 4G
|
189 |
+
cpus: '2.0'
|
190 |
+
reservations:
|
191 |
+
memory: 2G
|
192 |
+
cpus: '1.0'
|
193 |
+
```
|
194 |
+
|
195 |
+
### ์บ์ ์ค์
|
196 |
+
|
197 |
+
```python
|
198 |
+
# Redis ์บ์ ํ์ฉ
|
199 |
+
import redis
|
200 |
+
from functools import lru_cache
|
201 |
+
|
202 |
+
redis_client = redis.Redis(host='redis', port=6379, db=0)
|
203 |
+
|
204 |
+
@lru_cache(maxsize=128)
|
205 |
+
def cached_model_response(prompt, model_id):
|
206 |
+
# ์บ์๋ ์๋ต ๋ฐํ
|
207 |
+
pass
|
208 |
+
```
|
209 |
+
|
210 |
+
## ๐จ ๋ฌธ์ ํด๊ฒฐ
|
211 |
+
|
212 |
+
### ์ผ๋ฐ์ ์ธ ๋ฌธ์
|
213 |
+
|
214 |
+
#### 1. ๋ฉ๏ฟฝ๏ฟฝ๋ฆฌ ๋ถ์กฑ
|
215 |
+
|
216 |
+
```bash
|
217 |
+
# ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ํ์ธ
|
218 |
+
docker stats
|
219 |
+
|
220 |
+
# ๋ถํ์ํ ์ปจํ
์ด๋ ์ ๋ฆฌ
|
221 |
+
docker system prune -f
|
222 |
+
```
|
223 |
+
|
224 |
+
#### 2. API ์๋ต ์์
|
225 |
+
|
226 |
+
```bash
|
227 |
+
# ์ปจํ
์ด๋ ์ํ ํ์ธ
|
228 |
+
docker-compose ps
|
229 |
+
|
230 |
+
# ๋ก๊ทธ ํ์ธ
|
231 |
+
docker-compose logs lily-llm-api
|
232 |
+
|
233 |
+
# ์ปจํ
์ด๋ ์ฌ์์
|
234 |
+
docker-compose restart lily-llm-api
|
235 |
+
```
|
236 |
+
|
237 |
+
#### 3. Redis ์ฐ๊ฒฐ ์คํจ
|
238 |
+
|
239 |
+
```bash
|
240 |
+
# Redis ์ปจํ
์ด๋ ์ํ ํ์ธ
|
241 |
+
docker-compose logs redis
|
242 |
+
|
243 |
+
# Redis ์ฌ์์
|
244 |
+
docker-compose restart redis
|
245 |
+
```
|
246 |
+
|
247 |
+
### ๋ก๊ทธ ๋ถ์
|
248 |
+
|
249 |
+
```bash
|
250 |
+
# ์ค์๊ฐ ๋ก๊ทธ ํ์ธ
|
251 |
+
docker-compose logs -f
|
252 |
+
|
253 |
+
# ํน์ ์๋น์ค ๋ก๊ทธ
|
254 |
+
docker-compose logs -f lily-llm-api
|
255 |
+
|
256 |
+
# ์ค๋ฅ ๋ก๊ทธ๋ง ํ์ธ
|
257 |
+
docker-compose logs | grep ERROR
|
258 |
+
```
|
259 |
+
|
260 |
+
## ๐ ์
๋ฐ์ดํธ
|
261 |
+
|
262 |
+
### ์ฝ๋ ์
๋ฐ์ดํธ
|
263 |
+
|
264 |
+
```bash
|
265 |
+
# ์ต์ ์ฝ๋ ๊ฐ์ ธ์ค๊ธฐ
|
266 |
+
git pull origin main
|
267 |
+
|
268 |
+
# ๋ฐฐํฌ ์
๋ฐ์ดํธ
|
269 |
+
./scripts/deploy.sh update
|
270 |
+
```
|
271 |
+
|
272 |
+
### ๋ชจ๋ธ ์
๋ฐ์ดํธ
|
273 |
+
|
274 |
+
```bash
|
275 |
+
# ๋ชจ๋ธ ํ์ผ ๊ต์ฒด
|
276 |
+
cp new_model.safetensors ./models/
|
277 |
+
|
278 |
+
# ์๋น์ค ์ฌ์์
|
279 |
+
docker-compose restart lily-llm-api
|
280 |
+
```
|
281 |
+
|
282 |
+
## ๐ ์ถ๊ฐ ๋ฌธ์
|
283 |
+
|
284 |
+
- [API ๋ฌธ์](http://localhost:8001/docs)
|
285 |
+
- [์ฑ๋ฅ ๊ฐ์ด๋](./PERFORMANCE.md)
|
286 |
+
- [๋ณด์ ๊ฐ์ด๋](./SECURITY.md)
|
287 |
+
- [๋ชจ๋ํฐ๋ง ๊ฐ์ด๋](./MONITORING.md)
|
288 |
+
|
289 |
+
## ๐ค ์ง์
|
290 |
+
|
291 |
+
๋ฌธ์ ๊ฐ ๋ฐ์ํ๋ฉด ๋ค์์ ํ์ธํ์ธ์:
|
292 |
+
|
293 |
+
1. ๋ก๊ทธ ํ์ผ ํ์ธ
|
294 |
+
2. ์์คํ
๋ฆฌ์์ค ํ์ธ
|
295 |
+
3. ๋คํธ์ํฌ ์ฐ๊ฒฐ ํ์ธ
|
296 |
+
4. ํ๊ฒฝ ๋ณ์ ์ค์ ํ์ธ
|
297 |
+
|
298 |
+
## ๐ ๋ณ๊ฒฝ ๋ก๊ทธ
|
299 |
+
|
300 |
+
### v1.0.0 (2025-08-04)
|
301 |
+
- ์ด๊ธฐ Docker ๋ฐฐํฌ ์ค์
|
302 |
+
- CI/CD ํ์ดํ๋ผ์ธ ๊ตฌ์ถ
|
303 |
+
- ๋ชจ๋ํฐ๋ง ์์คํ
์ถ๊ฐ
|
304 |
+
- ๋ณด์ ์ค์ ๊ฐํ
|
README_LILY.md
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API - Hugging Face Spaces
|
2 |
+
|
3 |
+
## ๐ค ์๊ฐ
|
4 |
+
|
5 |
+
Lily LLM API๋ ๋ค์ค ๋ชจ๋ธ ์ง์๊ณผ RAG(Retrieval Augmented Generation) ์์คํ
์ ๊ฐ์ถ ๊ณ ์ฑ๋ฅ AI API ์๋ฒ์
๋๋ค.
|
6 |
+
|
7 |
+
### โจ ์ฃผ์ ๊ธฐ๋ฅ
|
8 |
+
|
9 |
+
- **๐ง ๋ฉํฐ๋ชจ๋ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์ ํตํ ํ
์คํธ ๋ฐ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
10 |
+
- **๐ RAG ์์คํ
**: ๋ฌธ์ ๊ธฐ๋ฐ ์ง์์๋ต ๋ฐ ์ปจํ
์คํธ ๊ฒ์
|
11 |
+
- **๐ ๋ฒกํฐ ๊ฒ์**: FAISS ๊ธฐ๋ฐ ๊ณ ์ ์ ์ฌ๋ ๊ฒ์
|
12 |
+
- **๐ ๋ฌธ์ ์ฒ๋ฆฌ**: PDF, DOCX, TXT ๋ฑ ๋ค์ํ ๋ฌธ์ ํ์ ์ง์
|
13 |
+
- **๐ผ๏ธ ์ด๋ฏธ์ง OCR**: LaTeX-OCR์ ํตํ ์ํ ๊ณต์ ์ธ์
|
14 |
+
- **โก ๋น๋๊ธฐ ์ฒ๋ฆฌ**: Celery ๊ธฐ๋ฐ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
|
15 |
+
- **๐ RESTful API**: FastAPI ๊ธฐ๋ฐ ๊ณ ์ฑ๋ฅ ์น API
|
16 |
+
|
17 |
+
### ๐ ์ฌ์ฉ ๋ฐฉ๋ฒ
|
18 |
+
|
19 |
+
#### 1. ํ
์คํธ ์์ฑ
|
20 |
+
|
21 |
+
```python
|
22 |
+
import requests
|
23 |
+
|
24 |
+
response = requests.post(
|
25 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
|
26 |
+
data={"prompt": "์๋
ํ์ธ์! ์ค๋ ๋ ์จ๊ฐ ์ด๋ค๊ฐ์?"}
|
27 |
+
)
|
28 |
+
print(response.json())
|
29 |
+
```
|
30 |
+
|
31 |
+
#### 2. ์ด๋ฏธ์ง์ ํจ๊ป ์ง์
|
32 |
+
|
33 |
+
```python
|
34 |
+
import requests
|
35 |
+
|
36 |
+
with open("image.jpg", "rb") as f:
|
37 |
+
response = requests.post(
|
38 |
+
"https://https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
|
39 |
+
data={"prompt": "์ด๋ฏธ์ง์์ ๋ฌด์์ ๋ณผ ์ ์๋์?"},
|
40 |
+
files={"image1": f}
|
41 |
+
)
|
42 |
+
print(response.json())
|
43 |
+
```
|
44 |
+
|
45 |
+
#### 3. RAG ๊ธฐ๋ฐ ์ง์์๋ต
|
46 |
+
|
47 |
+
```python
|
48 |
+
import requests
|
49 |
+
|
50 |
+
# ๋ฌธ์ ์
๋ก๋
|
51 |
+
with open("document.pdf", "rb") as f:
|
52 |
+
upload_response = requests.post(
|
53 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/upload-document",
|
54 |
+
files={"file": f},
|
55 |
+
data={"user_id": "your_user_id"}
|
56 |
+
)
|
57 |
+
|
58 |
+
document_id = upload_response.json()["document_id"]
|
59 |
+
|
60 |
+
# RAG ์ง์
|
61 |
+
response = requests.post(
|
62 |
+
"https://huggingface.co/spaces/gbrabbit/lily_fast_api/rag-query",
|
63 |
+
json={
|
64 |
+
"query": "๋ฌธ์์ ์ฃผ์ ๋ด์ฉ์ ๋ฌด์์ธ๊ฐ์?",
|
65 |
+
"user_id": "your_user_id",
|
66 |
+
"document_id": document_id
|
67 |
+
}
|
68 |
+
)
|
69 |
+
print(response.json())
|
70 |
+
```
|
71 |
+
|
72 |
+
### ๐ API ์๋ํฌ์ธํธ
|
73 |
+
|
74 |
+
#### ๊ธฐ๋ณธ ์๋ํฌ์ธํธ
|
75 |
+
- `GET /health` - ์๋ฒ ์ํ ํ์ธ
|
76 |
+
- `GET /models` - ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก
|
77 |
+
- `POST /load-model` - ๋ชจ๋ธ ๋ก๋
|
78 |
+
- `POST /generate` - ํ
์คํธ/์ด๋ฏธ์ง ์์ฑ
|
79 |
+
|
80 |
+
#### RAG ์์คํ
|
81 |
+
- `POST /upload-document` - ๋ฌธ์ ์
๋ก๋
|
82 |
+
- `POST /rag-query` - RAG ๊ธฐ๋ฐ ์ง์
|
83 |
+
- `GET /documents/{user_id}` - ์ฌ์ฉ์ ๋ฌธ์ ๋ชฉ๋ก
|
84 |
+
- `DELETE /document/{document_id}` - ๋ฌธ์ ์ญ์
|
85 |
+
|
86 |
+
#### ๊ณ ๊ธ ๊ธฐ๋ฅ
|
87 |
+
- `POST /batch-process` - ๋ฐฐ์น ๋ฌธ์ ์ฒ๋ฆฌ
|
88 |
+
- `GET /task-status/{task_id}` - ์์
์ํ ํ์ธ
|
89 |
+
- `POST /cancel-task/{task_id}` - ์์
์ทจ์
|
90 |
+
|
91 |
+
### ๐ ๏ธ ๊ธฐ์ ์คํ
|
92 |
+
|
93 |
+
- **Backend**: FastAPI, Python 3.11
|
94 |
+
- **AI Models**: Transformers, PyTorch
|
95 |
+
- **Vector DB**: FAISS, ChromaDB
|
96 |
+
- **Task Queue**: Celery, Redis
|
97 |
+
- **OCR**: LaTeX-OCR, EasyOCR
|
98 |
+
- **Document Processing**: LangChain
|
99 |
+
|
100 |
+
### ๐ ๋ชจ๋ธ ์ ๋ณด
|
101 |
+
|
102 |
+
#### Kanana-1.5-v-3b-instruct
|
103 |
+
- **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐ๋ณ์
|
104 |
+
- **์ธ์ด**: ํ๊ตญ์ด ํนํ
|
105 |
+
- **๊ธฐ๋ฅ**: ํ
์คํธ ์์ฑ, ์ด๋ฏธ์ง ์ดํด
|
106 |
+
- **์ปจํ
์คํธ**: ์ต๋ 4096 ํ ํฐ
|
107 |
+
|
108 |
+
### ๐ง ์ค์
|
109 |
+
|
110 |
+
ํ๊ฒฝ ๋ณ์๋ฅผ ํตํด ๋ค์ ์ค์ ์ ์กฐ์ ํ ์ ์์ต๋๋ค:
|
111 |
+
|
112 |
+
```bash
|
113 |
+
# ์๋ฒ ์ค์
|
114 |
+
HOST=0.0.0.0
|
115 |
+
PORT=7860
|
116 |
+
|
117 |
+
# ๋ชจ๋ธ ์ค์
|
118 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
119 |
+
MAX_NEW_TOKENS=256
|
120 |
+
TEMPERATURE=0.7
|
121 |
+
|
122 |
+
# ์บ์ ์ค์
|
123 |
+
TRANSFORMERS_CACHE=/app/cache/transformers
|
124 |
+
HF_HOME=/app/cache/huggingface
|
125 |
+
```
|
126 |
+
|
127 |
+
### ๐ ๋ผ์ด์ ์ค
|
128 |
+
|
129 |
+
์ด ํ๋ก์ ํธ๋ MIT ๋ผ์ด์ ์ค ํ์ ๋ฐฐํฌ๋ฉ๋๋ค.
|
130 |
+
|
131 |
+
### ๐ค ๊ธฐ์ฌ
|
132 |
+
|
133 |
+
๋ฒ๊ทธ ๋ฆฌํฌํธ, ๊ธฐ๋ฅ ์ ์, ํ ๋ฆฌํ์คํธ๋ฅผ ํ์ํฉ๋๋ค!
|
134 |
+
|
135 |
+
### ๐ ์ง์
|
136 |
+
|
137 |
+
๋ฌธ์์ฌํญ์ด ์์ผ์๋ฉด GitHub Issues๋ฅผ ํตํด ์ฐ๋ฝํด ์ฃผ์ธ์.
|
README_gradio.md
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Lily Math RAG System
|
3 |
+
emoji: ๐งฎ
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.0.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
test
|
14 |
+
|
15 |
+
# ๐งฎ Lily Math RAG System
|
16 |
+
|
17 |
+
์ํ ๋ฌธ์ ํด๊ฒฐ์ ์ํ ํ๊ตญ์ด AI ์์คํ
์
๋๋ค.
|
18 |
+
|
19 |
+
## ๊ธฐ๋ฅ
|
20 |
+
|
21 |
+
- ๐ฌ **์ฑํ
**: ์ผ๋ฐ์ ์ธ ๋ํ ๋ฐ ์ํ ์ง๋ฌธ
|
22 |
+
- ๐งฎ **์ํ ๋ฌธ์ ํด๊ฒฐ**: ๊ตฌ์ฒด์ ์ธ ์ํ ๋ฌธ์ ํ์ด
|
23 |
+
- ๐ **RAG ๊ธฐ์ **: ๊ฒ์ ๊ธฐ๋ฐ ์์ฑ์ผ๋ก ์ ํํ ๋ต๋ณ
|
24 |
+
|
25 |
+
## ์ฌ์ฉ๋ฒ
|
26 |
+
|
27 |
+
1. **์ฑํ
ํญ**: ์ผ๋ฐ์ ์ธ ๋ํ๋ ์ํ ์ง๋ฌธ
|
28 |
+
2. **์ํ ๋ฌธ์ ํด๊ฒฐ ํญ**: ๊ตฌ์ฒด์ ์ธ ์ํ ๋ฌธ์ ํ์ด
|
29 |
+
3. **์ค์ ํญ**: ์์คํ
์ ๋ณด ํ์ธ
|
30 |
+
|
31 |
+
## ๊ธฐ์ ์คํ
|
32 |
+
|
33 |
+
- **๋ชจ๋ธ**: Polyglot-Ko 5.8B Chat
|
34 |
+
- **ํ๋ ์์ํฌ**: Gradio
|
35 |
+
- **์ธ์ด**: ํ๊ตญ์ด
|
36 |
+
- **์ฉ๋**: ์ํ ๊ต์ก ๋ฐ ๋ฌธ์ ํด๊ฒฐ
|
37 |
+
|
38 |
+
## ๋ผ์ด์ ์ค
|
39 |
+
|
40 |
+
MIT License
|
README_huggingface.md
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API - Hugging Face Spaces
|
2 |
+
|
3 |
+
## ๐ค ์๊ฐ
|
4 |
+
|
5 |
+
Lily LLM API๋ ๋ค์ค ๋ชจ๋ธ ์ง์๊ณผ RAG(Retrieval Augmented Generation) ์์คํ
์ ๊ฐ์ถ ๊ณ ์ฑ๋ฅ AI API ์๋ฒ์
๋๋ค.
|
6 |
+
|
7 |
+
### โจ ์ฃผ์ ๊ธฐ๋ฅ
|
8 |
+
|
9 |
+
- **๐ง ๋ฉํฐ๋ชจ๋ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์ ํตํ ํ
์คํธ ๋ฐ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
10 |
+
- **๐ RAG ์์คํ
**: ๋ฌธ์ ๊ธฐ๋ฐ ์ง์์๋ต ๋ฐ ์ปจํ
์คํธ ๊ฒ์
|
11 |
+
- **๐ ๋ฒกํฐ ๊ฒ์**: FAISS ๊ธฐ๋ฐ ๊ณ ์ ์ ์ฌ๋ ๊ฒ์
|
12 |
+
- **๐ ๋ฌธ์ ์ฒ๋ฆฌ**: PDF, DOCX, TXT ๋ฑ ๋ค์ํ ๋ฌธ์ ํ์ ์ง์
|
13 |
+
- **๐ผ๏ธ ์ด๋ฏธ์ง OCR**: LaTeX-OCR์ ํตํ ์ํ ๊ณต์ ์ธ์
|
14 |
+
- **โก ๋น๋๊ธฐ ์ฒ๋ฆฌ**: Celery ๊ธฐ๋ฐ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
|
15 |
+
- **๐ RESTful API**: FastAPI ๊ธฐ๋ฐ ๊ณ ์ฑ๋ฅ ์น API
|
16 |
+
|
17 |
+
### ๐ ์ฌ์ฉ ๋ฐฉ๋ฒ
|
18 |
+
|
19 |
+
#### 1. ํ
์คํธ ์์ฑ
|
20 |
+
|
21 |
+
```python
|
22 |
+
import requests
|
23 |
+
|
24 |
+
response = requests.post(
|
25 |
+
"https://your-space-url/generate",
|
26 |
+
data={"prompt": "์๋
ํ์ธ์! ์ค๋ ๋ ์จ๊ฐ ์ด๋ค๊ฐ์?"}
|
27 |
+
)
|
28 |
+
print(response.json())
|
29 |
+
```
|
30 |
+
|
31 |
+
#### 2. ์ด๋ฏธ์ง์ ํจ๊ป ์ง์
|
32 |
+
|
33 |
+
```python
|
34 |
+
import requests
|
35 |
+
|
36 |
+
with open("image.jpg", "rb") as f:
|
37 |
+
response = requests.post(
|
38 |
+
"https://your-space-url/generate",
|
39 |
+
data={"prompt": "์ด๋ฏธ์ง์์ ๋ฌด์์ ๋ณผ ์ ์๋์?"},
|
40 |
+
files={"image1": f}
|
41 |
+
)
|
42 |
+
print(response.json())
|
43 |
+
```
|
44 |
+
|
45 |
+
#### 3. RAG ๊ธฐ๋ฐ ์ง์์๋ต
|
46 |
+
|
47 |
+
```python
|
48 |
+
import requests
|
49 |
+
|
50 |
+
# ๋ฌธ์ ์
๋ก๋
|
51 |
+
with open("document.pdf", "rb") as f:
|
52 |
+
upload_response = requests.post(
|
53 |
+
"https://your-space-url/upload-document",
|
54 |
+
files={"file": f},
|
55 |
+
data={"user_id": "your_user_id"}
|
56 |
+
)
|
57 |
+
|
58 |
+
document_id = upload_response.json()["document_id"]
|
59 |
+
|
60 |
+
# RAG ์ง์
|
61 |
+
response = requests.post(
|
62 |
+
"https://your-space-url/rag-query",
|
63 |
+
json={
|
64 |
+
"query": "๋ฌธ์์ ์ฃผ์ ๋ด์ฉ์ ๋ฌด์์ธ๊ฐ์?",
|
65 |
+
"user_id": "your_user_id",
|
66 |
+
"document_id": document_id
|
67 |
+
}
|
68 |
+
)
|
69 |
+
print(response.json())
|
70 |
+
```
|
71 |
+
|
72 |
+
### ๐ API ์๋ํฌ์ธํธ
|
73 |
+
|
74 |
+
#### ๊ธฐ๋ณธ ์๋ํฌ์ธํธ
|
75 |
+
- `GET /health` - ์๋ฒ ์ํ ํ์ธ
|
76 |
+
- `GET /models` - ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก
|
77 |
+
- `POST /load-model` - ๋ชจ๋ธ ๋ก๋
|
78 |
+
- `POST /generate` - ํ
์คํธ/์ด๋ฏธ์ง ์์ฑ
|
79 |
+
|
80 |
+
#### RAG ์์คํ
|
81 |
+
- `POST /upload-document` - ๋ฌธ์ ์
๋ก๋
|
82 |
+
- `POST /rag-query` - RAG ๊ธฐ๋ฐ ์ง์
|
83 |
+
- `GET /documents/{user_id}` - ์ฌ์ฉ์ ๋ฌธ์ ๋ชฉ๋ก
|
84 |
+
- `DELETE /document/{document_id}` - ๋ฌธ์ ์ญ์
|
85 |
+
|
86 |
+
#### ๊ณ ๊ธ ๊ธฐ๋ฅ
|
87 |
+
- `POST /batch-process` - ๋ฐฐ์น ๋ฌธ์ ์ฒ๋ฆฌ
|
88 |
+
- `GET /task-status/{task_id}` - ์์
์ํ ํ์ธ
|
89 |
+
- `POST /cancel-task/{task_id}` - ์์
์ทจ์
|
90 |
+
|
91 |
+
### ๐ ๏ธ ๊ธฐ์ ์คํ
|
92 |
+
|
93 |
+
- **Backend**: FastAPI, Python 3.11
|
94 |
+
- **AI Models**: Transformers, PyTorch
|
95 |
+
- **Vector DB**: FAISS, ChromaDB
|
96 |
+
- **Task Queue**: Celery, Redis
|
97 |
+
- **OCR**: LaTeX-OCR, EasyOCR
|
98 |
+
- **Document Processing**: LangChain
|
99 |
+
|
100 |
+
### ๐ ๋ชจ๋ธ ์ ๋ณด
|
101 |
+
|
102 |
+
#### Kanana-1.5-v-3b-instruct
|
103 |
+
- **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐ๋ณ์
|
104 |
+
- **์ธ์ด**: ํ๊ตญ์ด ํนํ
|
105 |
+
- **๊ธฐ๋ฅ**: ํ
์คํธ ์์ฑ, ์ด๋ฏธ์ง ์ดํด
|
106 |
+
- **์ปจํ
์คํธ**: ์ต๋ 4096 ํ ํฐ
|
107 |
+
|
108 |
+
### ๐ง ์ค์
|
109 |
+
|
110 |
+
ํ๊ฒฝ ๋ณ์๋ฅผ ํตํด ๋ค์ ์ค์ ์ ์กฐ์ ํ ์ ์์ต๋๋ค:
|
111 |
+
|
112 |
+
```bash
|
113 |
+
# ์๋ฒ ์ค์
|
114 |
+
HOST=0.0.0.0
|
115 |
+
PORT=7860
|
116 |
+
|
117 |
+
# ๋ชจ๋ธ ์ค์
|
118 |
+
DEFAULT_MODEL=kanana-1.5-v-3b-instruct
|
119 |
+
MAX_NEW_TOKENS=256
|
120 |
+
TEMPERATURE=0.7
|
121 |
+
|
122 |
+
# ์บ์ ์ค์
|
123 |
+
TRANSFORMERS_CACHE=/app/cache/transformers
|
124 |
+
HF_HOME=/app/cache/huggingface
|
125 |
+
```
|
126 |
+
|
127 |
+
### ๐ ๋ผ์ด์ ์ค
|
128 |
+
|
129 |
+
์ด ํ๋ก์ ํธ๋ MIT ๋ผ์ด์ ์ค ํ์ ๋ฐฐํฌ๋ฉ๋๋ค.
|
130 |
+
|
131 |
+
### ๐ค ๊ธฐ์ฌ
|
132 |
+
|
133 |
+
๋ฒ๊ทธ ๋ฆฌํฌํธ, ๊ธฐ๋ฅ ์ ์, ํ ๋ฆฌํ์คํธ๋ฅผ ํ์ํฉ๋๋ค!
|
134 |
+
|
135 |
+
### ๐ ์ง์
|
136 |
+
|
137 |
+
๋ฌธ์์ฌํญ์ด ์์ผ์๋ฉด GitHub Issues๋ฅผ ํตํด ์ฐ๋ฝํด ์ฃผ์ธ์.
|
WINDOWS_GPU_DEPLOYMENT_GUIDE.md
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๐ฅ๏ธ Windows GPU ํ๊ฒฝ ๋ฐฐํฌ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ์ฌ์ ์๊ตฌ์ฌํญ
|
4 |
+
|
5 |
+
### 1. ํ๋์จ์ด ์๊ตฌ์ฌํญ
|
6 |
+
- **GPU**: NVIDIA GPU (RTX 3060 ์ด์ ๊ถ์ฅ)
|
7 |
+
- **๋ฉ๋ชจ๋ฆฌ**: ์ต์ 16GB RAM, ๊ถ์ฅ 32GB RAM
|
8 |
+
- **์ ์ฅ๊ณต๊ฐ**: ์ต์ 50GB ์ฌ์ ๊ณต๊ฐ
|
9 |
+
|
10 |
+
### 2. ์ํํธ์จ์ด ์๊ตฌ์ฌํญ
|
11 |
+
|
12 |
+
#### NVIDIA ๋๋ผ์ด๋ฒ ์ค์น
|
13 |
+
1. **NVIDIA ์น์ฌ์ดํธ ๋ฐฉ๋ฌธ**: https://www.nvidia.com/Download/index.aspx
|
14 |
+
2. **GPU ๋ชจ๋ธ ์ ํ**: ์ฌ์ฉ ์ค์ธ GPU ๋ชจ๋ธ ์ ํ
|
15 |
+
3. **๋๋ผ์ด๋ฒ ๋ค์ด๋ก๋**: ์ต์ ๋๋ผ์ด๋ฒ ๋ค์ด๋ก๋ ๋ฐ ์ค์น
|
16 |
+
4. **์ฌ๋ถํ
**: ์ค์น ์๋ฃ ํ ์์คํ
์ฌ๋ถํ
|
17 |
+
|
18 |
+
#### Docker Desktop ์ค์น
|
19 |
+
1. **Docker Desktop ๋ค์ด๋ก๋**: https://www.docker.com/products/docker-desktop
|
20 |
+
2. **์ค์น ์คํ**: ๋ค์ด๋ก๋ํ ํ์ผ ์คํ
|
21 |
+
3. **WSL 2 ์ค์ **: Windows Subsystem for Linux 2 ํ์ฑํ
|
22 |
+
4. **์ฌ๋ถํ
**: ์ค์น ์๋ฃ ํ ์์คํ
์ฌ๋ถํ
|
23 |
+
|
24 |
+
#### Python GPU ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
|
25 |
+
```cmd
|
26 |
+
# ๊ฐ์ํ๊ฒฝ ํ์ฑํ
|
27 |
+
lily_llm_env\Scripts\activate
|
28 |
+
|
29 |
+
# PyTorch GPU ๋ฒ์ ์ค์น
|
30 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
31 |
+
|
32 |
+
# Hugging Face ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
|
33 |
+
pip install transformers huggingface_hub
|
34 |
+
|
35 |
+
# ์ถ๊ฐ GPU ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ค์น
|
36 |
+
pip install accelerate bitsandbytes
|
37 |
+
```
|
38 |
+
|
39 |
+
## ๐ง ํ๊ฒฝ ์ค์
|
40 |
+
|
41 |
+
### 1. GPU ํ๊ฒฝ ํ์ธ
|
42 |
+
```cmd
|
43 |
+
cd C:\Project\lily_generate_project\lily_generate_package
|
44 |
+
python check_gpu_environment.py
|
45 |
+
```
|
46 |
+
|
47 |
+
### 2. Windows GPU ์ค์
|
48 |
+
```cmd
|
49 |
+
# Windows GPU ํ๊ฒฝ ์ค์ ์คํฌ๋ฆฝํธ ์คํ
|
50 |
+
windows_gpu_setup.bat
|
51 |
+
```
|
52 |
+
|
53 |
+
### 3. Hugging Face ์ค์
|
54 |
+
```cmd
|
55 |
+
# Hugging Face ํ ํฐ ์ค์
|
56 |
+
huggingface-cli login
|
57 |
+
|
58 |
+
# ๋๋ Python ์คํฌ๋ฆฝํธ๋ก ์ค์
|
59 |
+
python huggingface_gpu_setup.py
|
60 |
+
```
|
61 |
+
|
62 |
+
## ๐ ๋ฐฐํฌ ์คํ
|
63 |
+
|
64 |
+
### 1. ์๋ ๋ฐฐํฌ (๊ถ์ฅ)
|
65 |
+
```cmd
|
66 |
+
# Windows GPU ๋ฐฐํฌ ์คํฌ๋ฆฝํธ ์คํ
|
67 |
+
deploy_gpu_windows.bat
|
68 |
+
```
|
69 |
+
|
70 |
+
### 2. ์๋ ๋ฐฐํฌ
|
71 |
+
```cmd
|
72 |
+
# 1. ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ
|
73 |
+
docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
|
74 |
+
|
75 |
+
# 2. ์ด๋ฏธ์ง ๋น๋
|
76 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
77 |
+
|
78 |
+
# 3. ์ปจํ
์ด๋ ์์
|
79 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
80 |
+
|
81 |
+
# 4. ์๋น์ค ์ํ ํ์ธ
|
82 |
+
docker-compose -f docker-compose.gpu.yml logs -f
|
83 |
+
```
|
84 |
+
|
85 |
+
## ๐งช ํ
์คํธ
|
86 |
+
|
87 |
+
### 1. GPU ๋ฐฐํฌ ํ
์คํธ
|
88 |
+
```cmd
|
89 |
+
python test_gpu_deployment.py
|
90 |
+
```
|
91 |
+
|
92 |
+
### 2. Hugging Face ๋ชจ๋ธ ํ
์คํธ
|
93 |
+
```cmd
|
94 |
+
python huggingface_gpu_setup.py
|
95 |
+
```
|
96 |
+
|
97 |
+
### 3. API ํ
์คํธ
|
98 |
+
```cmd
|
99 |
+
curl http://localhost:8001/health
|
100 |
+
```
|
101 |
+
|
102 |
+
## ๐ ๋ชจ๋ํฐ๋ง
|
103 |
+
|
104 |
+
### 1. GPU ์ฌ์ฉ๋ ํ์ธ
|
105 |
+
```cmd
|
106 |
+
# GPU ์ ๋ณด ํ์ธ
|
107 |
+
nvidia-smi
|
108 |
+
|
109 |
+
# ์ค์๊ฐ ๋ชจ๋ํฐ๋ง
|
110 |
+
nvidia-smi -l 1
|
111 |
+
```
|
112 |
+
|
113 |
+
### 2. ์ปจํ
์ด๋ ์ํ ํ์ธ
|
114 |
+
```cmd
|
115 |
+
# ์คํ ์ค์ธ ์ปจํ
์ด๋ ํ์ธ
|
116 |
+
docker ps
|
117 |
+
|
118 |
+
# ์ปจํ
์ด๋ ๋ฆฌ์์ค ์ฌ์ฉ๋ ํ์ธ
|
119 |
+
docker stats
|
120 |
+
```
|
121 |
+
|
122 |
+
### 3. ๋ก๊ทธ ํ์ธ
|
123 |
+
```cmd
|
124 |
+
# ์ ์ฒด ๋ก๊ทธ
|
125 |
+
docker-compose -f docker-compose.gpu.yml logs -f
|
126 |
+
|
127 |
+
# ํน์ ์๋น์ค ๋ก๊ทธ
|
128 |
+
docker-compose -f docker-compose.gpu.yml logs -f lily-llm-api-gpu
|
129 |
+
```
|
130 |
+
|
131 |
+
## ๐ง ๋ฌธ์ ํด๊ฒฐ
|
132 |
+
|
133 |
+
### 1. NVIDIA ๋๋ผ์ด๋ฒ ๋ฌธ์
|
134 |
+
```cmd
|
135 |
+
# ๋๋ผ์ด๋ฒ ๋ฒ์ ํ์ธ
|
136 |
+
nvidia-smi
|
137 |
+
|
138 |
+
# ๋ฌธ์ ๋ฐ์ ์ ๋๋ผ์ด๋ฒ ์ฌ์ค์น
|
139 |
+
# 1. ๊ธฐ์กด ๋๋ผ์ด๋ฒ ์ ๊ฑฐ
|
140 |
+
# 2. ์ต์ ๋๋ผ์ด๋ฒ ๋ค์ด๋ก๋ ๋ฐ ์ค์น
|
141 |
+
# 3. ์์คํ
์ฌ๋ถํ
|
142 |
+
```
|
143 |
+
|
144 |
+
### 2. Docker ๋ฌธ์
|
145 |
+
```cmd
|
146 |
+
# Docker Desktop ์ฌ์์
|
147 |
+
# Docker Desktop > Settings > General > Restart
|
148 |
+
|
149 |
+
# WSL 2 ํ์ธ
|
150 |
+
wsl --list --verbose
|
151 |
+
|
152 |
+
# Docker ๊ถํ ๋ฌธ์ ํด๊ฒฐ
|
153 |
+
# Docker Desktop > Settings > Resources > WSL Integration
|
154 |
+
```
|
155 |
+
|
156 |
+
### 3. CUDA ๋ฒ์ ์ถฉ๋
|
157 |
+
```cmd
|
158 |
+
# PyTorch CUDA ๋ฒ์ ํ์ธ
|
159 |
+
python -c "import torch; print(torch.version.cuda)"
|
160 |
+
|
161 |
+
# CUDA ๋ฒ์ ์ ๋ง๋ PyTorch ์ฌ์ค์น
|
162 |
+
pip uninstall torch torchvision torchaudio
|
163 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
|
164 |
+
```
|
165 |
+
|
166 |
+
### 4. ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ
|
167 |
+
```cmd
|
168 |
+
# GPU ๋ฉ๋ชจ๋ฆฌ ํ์ธ
|
169 |
+
nvidia-smi
|
170 |
+
|
171 |
+
# ์ปจํ
์ด๋ ์ฌ์์
|
172 |
+
docker-compose -f docker-compose.gpu.yml restart
|
173 |
+
|
174 |
+
# ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ ์ ์ฉ
|
175 |
+
python performance_optimization.py
|
176 |
+
```
|
177 |
+
|
178 |
+
## ๐ ์ฑ๋ฅ ์ต์ ํ
|
179 |
+
|
180 |
+
### 1. Windows ์ ์ฉ ์ต์ ํ
|
181 |
+
```cmd
|
182 |
+
# ๊ฐ์ ๋ฉ๋ชจ๋ฆฌ ์ฆ๊ฐ
|
183 |
+
# ์ ์ดํ > ์์คํ
> ๊ณ ๊ธ ์์คํ
์ค์ > ์ฑ๋ฅ > ์ค์ > ๊ณ ๊ธ > ๊ฐ์ ๋ฉ๋ชจ๋ฆฌ
|
184 |
+
|
185 |
+
# ์ ์ ์ค์ ์ต์ ํ
|
186 |
+
# ์ ์ดํ > ์ ์ ์ต์
> ๊ณ ์ฑ๋ฅ ์ ํ
|
187 |
+
```
|
188 |
+
|
189 |
+
### 2. Docker ์ต์ ํ
|
190 |
+
```cmd
|
191 |
+
# Docker Desktop ์ค์ ์ต์ ํ
|
192 |
+
# Docker Desktop > Settings > Resources
|
193 |
+
# - Memory: 8GB ์ด์ ํ ๋น
|
194 |
+
# - CPUs: 4๊ฐ ์ด์ ํ ๋น
|
195 |
+
# - Disk image size: 64GB ์ด์
|
196 |
+
```
|
197 |
+
|
198 |
+
### 3. GPU ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ
|
199 |
+
```python
|
200 |
+
# 4-bit ์์ํ ์ ์ฉ
|
201 |
+
python huggingface_gpu_setup.py
|
202 |
+
|
203 |
+
# ๋ฐฐ์น ํฌ๊ธฐ ์กฐ์
|
204 |
+
# config.yaml์์ batch_size ์กฐ์
|
205 |
+
```
|
206 |
+
|
207 |
+
## ๐ ์
๋ฐ์ดํธ
|
208 |
+
|
209 |
+
### 1. ๋ชจ๋ธ ์
๋ฐ์ดํธ
|
210 |
+
```cmd
|
211 |
+
# ์ต์ ๋ชจ๋ธ ๋ค์ด๋ก๋
|
212 |
+
python huggingface_gpu_setup.py
|
213 |
+
|
214 |
+
# ์ปจํ
์ด๋ ์ฌ์์
|
215 |
+
docker-compose -f docker-compose.gpu.yml restart
|
216 |
+
```
|
217 |
+
|
218 |
+
### 2. ์ฝ๋ ์
๋ฐ์ดํธ
|
219 |
+
```cmd
|
220 |
+
# ์ฝ๋ ๋ณ๊ฒฝ ํ ์ฌ๋น๋
|
221 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
222 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
223 |
+
```
|
224 |
+
|
225 |
+
## ๐ ์ง์
|
226 |
+
|
227 |
+
### ๋ฌธ์ ๋ฐ์ ์ ํ์ธ์ฌํญ
|
228 |
+
1. **GPU ๋๋ผ์ด๋ฒ**: `nvidia-smi` ๋ช
๋ น์ด ์คํ ๊ฐ๋ฅ ์ฌ๋ถ
|
229 |
+
2. **Docker Desktop**: WSL 2 ํตํฉ ํ์ฑํ ์ฌ๋ถ
|
230 |
+
3. **CUDA ๋ฒ์ **: PyTorch์ CUDA ๋ฒ์ ํธํ์ฑ
|
231 |
+
4. **์์คํ
๋ฉ๋ชจ๋ฆฌ**: 16GB ์ด์ ์ฌ์ ๋ฉ๋ชจ๋ฆฌ
|
232 |
+
5. **GPU ๋ฉ๋ชจ๋ฆฌ**: 8GB ์ด์ GPU ๋ฉ๋ชจ๋ฆฌ
|
233 |
+
|
234 |
+
### ๋ก๊ทธ ํ์ผ ์์น
|
235 |
+
- **Docker ๋ก๊ทธ**: `docker-compose -f docker-compose.gpu.yml logs`
|
236 |
+
- **์ ํ๋ฆฌ์ผ์ด์
๋ก๊ทธ**: `logs/` ๋๋ ํ ๋ฆฌ
|
237 |
+
- **GPU ๋ก๊ทธ**: `nvidia-smi`
|
238 |
+
|
239 |
+
## ๐ฏ Windows ์ ์ฉ ํ
|
240 |
+
|
241 |
+
### 1. WSL 2 ์ต์ ํ
|
242 |
+
```cmd
|
243 |
+
# WSL 2 ๋ฉ๋ชจ๋ฆฌ ์ ํ ์ค์
|
244 |
+
# %UserProfile%\.wslconfig ํ์ผ ์์ฑ
|
245 |
+
[wsl2]
|
246 |
+
memory=8GB
|
247 |
+
processors=4
|
248 |
+
```
|
249 |
+
|
250 |
+
### 2. Windows Defender ์์ธ ์ค์
|
251 |
+
```cmd
|
252 |
+
# ํ๋ก์ ํธ ํด๋๋ฅผ Windows Defender ์์ธ์ ์ถ๊ฐ
|
253 |
+
# Windows ๋ณด์ > ๋ฐ์ด๋ฌ์ค ๋ฐ ์ํ ๋ฐฉ์ง > ์ค์ > ์์ธ ์ถ๊ฐ
|
254 |
+
```
|
255 |
+
|
256 |
+
### 3. ์ ์ ๊ด๋ฆฌ ์ต์ ํ
|
257 |
+
```cmd
|
258 |
+
# ๊ณ ์ฑ๋ฅ ์ ์ ๊ณํ ์ ํ
|
259 |
+
powercfg /setactive 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c
|
260 |
+
```
|
261 |
+
|
262 |
+
## ๐ ๋น ๋ฅธ ์์
|
263 |
+
|
264 |
+
### 1๋จ๊ณ: ํ๊ฒฝ ํ์ธ
|
265 |
+
```cmd
|
266 |
+
windows_gpu_setup.bat
|
267 |
+
```
|
268 |
+
|
269 |
+
### 2๋จ๊ณ: Hugging Face ์ค์
|
270 |
+
```cmd
|
271 |
+
python huggingface_gpu_setup.py
|
272 |
+
```
|
273 |
+
|
274 |
+
### 3๋จ๊ณ: GPU ๋ฐฐํฌ
|
275 |
+
```cmd
|
276 |
+
deploy_gpu_windows.bat
|
277 |
+
```
|
278 |
+
|
279 |
+
### 4๋จ๊ณ: ํ
์คํธ
|
280 |
+
```cmd
|
281 |
+
python test_gpu_deployment.py
|
282 |
+
```
|
283 |
+
|
284 |
+
## ๐ ์ฑ๊ณต ํ์ธ
|
285 |
+
|
286 |
+
๋ฐฐํฌ๊ฐ ์ฑ๊ณต์ ์ผ๋ก ์๋ฃ๋๋ฉด ๋ค์ ์๋น์ค๋ค์ด ์คํ๋ฉ๋๋ค:
|
287 |
+
|
288 |
+
- โ
**Lily LLM API**: http://localhost:8001
|
289 |
+
- โ
**Hearth Chat**: http://localhost:8000
|
290 |
+
- โ
**LaTeX-OCR Service**: ๋ณ๋ ์ปจํ
์ด๋๋ก ์คํ
|
291 |
+
- โ
**GPU ๊ฐ์**: NVIDIA GPU ํ์ฉ
|
292 |
+
- โ
**Hugging Face ๋ชจ๋ธ**: ์ต์ ํ๋ ๋ชจ๋ธ ๋ก๋
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Lily Generate Package yy
|
app_huggingface.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Hugging Face Spaces์ฉ Lily LLM API ์๋ฒ ์ง์
์
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import logging
|
9 |
+
import asyncio
|
10 |
+
import uvicorn
|
11 |
+
from pathlib import Path
|
12 |
+
|
13 |
+
# ํ๋ก์ ํธ ๋ฃจํธ๋ฅผ Python path์ ์ถ๊ฐ
|
14 |
+
project_root = Path(__file__).parent
|
15 |
+
sys.path.insert(0, str(project_root))
|
16 |
+
|
17 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
18 |
+
os.environ.setdefault("PYTHONPATH", str(project_root))
|
19 |
+
os.environ.setdefault("HOST", "0.0.0.0")
|
20 |
+
os.environ.setdefault("PORT", "7860")
|
21 |
+
|
22 |
+
# ๋ก๊น
์ค์
|
23 |
+
logging.basicConfig(
|
24 |
+
level=logging.INFO,
|
25 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
26 |
+
)
|
27 |
+
logger = logging.getLogger(__name__)
|
28 |
+
|
29 |
+
def setup_huggingface_environment():
|
30 |
+
"""Hugging Face Spaces ํ๊ฒฝ ์ค์ """
|
31 |
+
|
32 |
+
# ํ์ํ ๋๋ ํ ๋ฆฌ ์์ฑ
|
33 |
+
directories = [
|
34 |
+
"data", "logs", "models", "uploads",
|
35 |
+
"vector_stores", "temp", "cache"
|
36 |
+
]
|
37 |
+
|
38 |
+
for dir_name in directories:
|
39 |
+
dir_path = project_root / dir_name
|
40 |
+
dir_path.mkdir(exist_ok=True)
|
41 |
+
logger.info(f"๐ ๋๋ ํ ๋ฆฌ ์์ฑ: {dir_path}")
|
42 |
+
|
43 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
44 |
+
env_vars = {
|
45 |
+
"TRANSFORMERS_CACHE": str(project_root / "cache" / "transformers"),
|
46 |
+
"HF_HOME": str(project_root / "cache" / "huggingface"),
|
47 |
+
"TORCH_HOME": str(project_root / "cache" / "torch"),
|
48 |
+
"TOKENIZERS_PARALLELISM": "false",
|
49 |
+
"OMP_NUM_THREADS": "1",
|
50 |
+
"MKL_NUM_THREADS": "1"
|
51 |
+
}
|
52 |
+
|
53 |
+
for key, value in env_vars.items():
|
54 |
+
os.environ[key] = value
|
55 |
+
logger.info(f"๐ง ํ๊ฒฝ๋ณ์ ์ค์ : {key}={value}")
|
56 |
+
|
57 |
+
async def main():
|
58 |
+
"""๋ฉ์ธ ์คํ ํจ์"""
|
59 |
+
|
60 |
+
logger.info("๐ Hugging Face Spaces์ฉ Lily LLM API ์๋ฒ ์์")
|
61 |
+
|
62 |
+
# ํ๊ฒฝ ์ค์
|
63 |
+
setup_huggingface_environment()
|
64 |
+
|
65 |
+
try:
|
66 |
+
# FastAPI ์ฑ import
|
67 |
+
from lily_llm_api.app_v2 import app
|
68 |
+
|
69 |
+
# ์๋ฒ ์ค์
|
70 |
+
host = os.getenv("HOST", "0.0.0.0")
|
71 |
+
port = int(os.getenv("PORT", "7860"))
|
72 |
+
|
73 |
+
logger.info(f"๐ ์๋ฒ ์์: {host}:{port}")
|
74 |
+
|
75 |
+
# Uvicorn ์๋ฒ ์คํ
|
76 |
+
config = uvicorn.Config(
|
77 |
+
app=app,
|
78 |
+
host=host,
|
79 |
+
port=port,
|
80 |
+
log_level="info",
|
81 |
+
access_log=True,
|
82 |
+
loop="asyncio"
|
83 |
+
)
|
84 |
+
|
85 |
+
server = uvicorn.Server(config)
|
86 |
+
await server.serve()
|
87 |
+
|
88 |
+
except Exception as e:
|
89 |
+
logger.error(f"โ ์๋ฒ ์์ ์ค๋ฅ: {e}")
|
90 |
+
import traceback
|
91 |
+
logger.error(f"๐ ์์ธ ์ค๋ฅ:\n{traceback.format_exc()}")
|
92 |
+
sys.exit(1)
|
93 |
+
|
94 |
+
if __name__ == "__main__":
|
95 |
+
# Python 3.7+ ํธํ์ฑ์ ์ํ asyncio ์คํ
|
96 |
+
try:
|
97 |
+
asyncio.run(main())
|
98 |
+
except KeyboardInterrupt:
|
99 |
+
logger.info("๐ ์๋ฒ ์ข
๋ฃ")
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f"โ ์คํ ์ค๋ฅ: {e}")
|
102 |
+
sys.exit(1)
|
app_local.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ๋ก์ปฌ ๊ฐ๋ฐ์ฉ Lily LLM API Server ์์์
|
2 |
+
|
3 |
+
import uvicorn
|
4 |
+
from lily_llm_api.app_v2 import app
|
5 |
+
import os
|
6 |
+
import logging
|
7 |
+
|
8 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
# ๋ก์ปฌ ๊ฐ๋ฐ ํ๊ฒฝ ์ค์
|
13 |
+
host = os.getenv("HOST", "0.0.0.0")
|
14 |
+
port = int(os.getenv("PORT", 8001))
|
15 |
+
|
16 |
+
logger.info(f"๐ ๋ก์ปฌ ๊ฐ๋ฐ ์๋ฒ ์์: {host}:{port}")
|
17 |
+
logger.info("๐ .env ํ์ผ์์ ํ๊ฒฝ๋ณ์๋ฅผ ๋ก๋ํฉ๋๋ค")
|
18 |
+
|
19 |
+
uvicorn.run(app, host=host, port=port, reload=True)
|
config.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
deploy_gpu.sh
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# GPU ํ๊ฒฝ ๋ฐฐํฌ ์คํฌ๋ฆฝํธ
|
4 |
+
echo "๐ GPU ํ๊ฒฝ ๋ฐฐํฌ ์์"
|
5 |
+
|
6 |
+
# NVIDIA Docker ์ง์ ํ์ธ
|
7 |
+
if ! command -v nvidia-docker &> /dev/null; then
|
8 |
+
echo "โ NVIDIA Docker๊ฐ ์ค์น๋์ง ์์์ต๋๋ค."
|
9 |
+
echo "NVIDIA Docker ์ค์น๊ฐ ํ์ํฉ๋๋ค: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
|
10 |
+
exit 1
|
11 |
+
fi
|
12 |
+
|
13 |
+
# GPU ์ฌ์ฉ ๊ฐ๋ฅ ์ฌ๋ถ ํ์ธ
|
14 |
+
if ! nvidia-smi &> /dev/null; then
|
15 |
+
echo "โ GPU๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค."
|
16 |
+
echo "GPU ๋๋ผ์ด๋ฒ๊ฐ ์ค์น๋์ด ์๋์ง ํ์ธํด์ฃผ์ธ์."
|
17 |
+
exit 1
|
18 |
+
fi
|
19 |
+
|
20 |
+
echo "โ
GPU ํ๊ฒฝ ํ์ธ ์๋ฃ"
|
21 |
+
|
22 |
+
# ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ
|
23 |
+
echo "๐งน ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ ์ค..."
|
24 |
+
docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
|
25 |
+
|
26 |
+
# ์ด๋ฏธ์ง ๋น๋
|
27 |
+
echo "๐จ Docker ์ด๋ฏธ์ง ๋น๋ ์ค..."
|
28 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
29 |
+
|
30 |
+
# ์ปจํ
์ด๋ ์์
|
31 |
+
echo "๐ ์ปจํ
์ด๋ ์์ ์ค..."
|
32 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
33 |
+
|
34 |
+
# ์๋น์ค ์ํ ํ์ธ
|
35 |
+
echo "๐ ์๋น์ค ์ํ ํ์ธ ์ค..."
|
36 |
+
sleep 10
|
37 |
+
|
38 |
+
# ํฌ์ค์ฒดํฌ
|
39 |
+
echo "๐ฅ ํฌ์ค์ฒดํฌ ์ค..."
|
40 |
+
for i in {1..30}; do
|
41 |
+
if curl -f http://localhost:8001/health &> /dev/null; then
|
42 |
+
echo "โ
Lily LLM API ์๋น์ค ์ ์"
|
43 |
+
break
|
44 |
+
fi
|
45 |
+
|
46 |
+
if [ $i -eq 30 ]; then
|
47 |
+
echo "โ ์๋น์ค ์์ ์คํจ"
|
48 |
+
docker-compose -f docker-compose.gpu.yml logs
|
49 |
+
exit 1
|
50 |
+
fi
|
51 |
+
|
52 |
+
echo "โณ ์๋น์ค ์์ ๋๊ธฐ ์ค... ($i/30)"
|
53 |
+
sleep 2
|
54 |
+
done
|
55 |
+
|
56 |
+
# GPU ์ฌ์ฉ๋ ํ์ธ
|
57 |
+
echo "๐ฎ GPU ์ฌ์ฉ๋ ํ์ธ:"
|
58 |
+
nvidia-smi
|
59 |
+
|
60 |
+
# ์๋น์ค ์ ๋ณด ์ถ๋ ฅ
|
61 |
+
echo ""
|
62 |
+
echo "๐ GPU ํ๊ฒฝ ๋ฐฐํฌ ์๋ฃ!"
|
63 |
+
echo ""
|
64 |
+
echo "๐ ์๋น์ค ์ ๋ณด:"
|
65 |
+
echo " - Lily LLM API: http://localhost:8001"
|
66 |
+
echo " - Hearth Chat: http://localhost:8000"
|
67 |
+
echo " - LaTeX-OCR Service: ๋ณ๋ ์ปจํ
์ด๋๋ก ์คํ ์ค"
|
68 |
+
echo ""
|
69 |
+
echo "๐ง ์ ์ฉํ ๋ช
๋ น์ด:"
|
70 |
+
echo " - ๋ก๊ทธ ํ์ธ: docker-compose -f docker-compose.gpu.yml logs -f"
|
71 |
+
echo " - ์๋น์ค ์ค์ง: docker-compose -f docker-compose.gpu.yml down"
|
72 |
+
echo " - ์๋น์ค ์ฌ์์: docker-compose -f docker-compose.gpu.yml restart"
|
73 |
+
echo ""
|
74 |
+
echo "๐งช ํ
์คํธ ๋ช
๋ น์ด:"
|
75 |
+
echo " - API ํ
์คํธ: curl http://localhost:8001/health"
|
76 |
+
echo " - GPU ํ
์คํธ: python test_gpu_deployment.py"
|
deploy_gpu_huggingface.sh
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Hugging Face GPU ๋ฐฐํฌ ์คํฌ๋ฆฝํธ
|
4 |
+
echo "๐ Hugging Face GPU ํ๊ฒฝ ๋ฐฐํฌ ์์"
|
5 |
+
|
6 |
+
# GPU ํ๊ฒฝ ํ์ธ
|
7 |
+
echo "๐ GPU ํ๊ฒฝ ํ์ธ ์ค..."
|
8 |
+
python check_gpu_environment.py
|
9 |
+
|
10 |
+
if [ $? -ne 0 ]; then
|
11 |
+
echo "โ GPU ํ๊ฒฝ ํ์ธ ์คํจ"
|
12 |
+
exit 1
|
13 |
+
fi
|
14 |
+
|
15 |
+
# Hugging Face ์ค์
|
16 |
+
echo "๐ง Hugging Face ํ๊ฒฝ ์ค์ ์ค..."
|
17 |
+
python huggingface_gpu_setup.py
|
18 |
+
|
19 |
+
# ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ
|
20 |
+
echo "๐งน ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ ์ค..."
|
21 |
+
docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
|
22 |
+
|
23 |
+
# GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
24 |
+
echo "๐พ GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ ์ค..."
|
25 |
+
nvidia-smi --gpu-reset
|
26 |
+
|
27 |
+
# ์ด๋ฏธ์ง ๋น๋
|
28 |
+
echo "๐จ Docker ์ด๋ฏธ์ง ๋น๋ ์ค..."
|
29 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
30 |
+
|
31 |
+
# ์ปจํ
์ด๋ ์์
|
32 |
+
echo "๐ ์ปจํ
์ด๋ ์์ ์ค..."
|
33 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
34 |
+
|
35 |
+
# ์๋น์ค ์ํ ํ์ธ
|
36 |
+
echo "๐ ์๋น์ค ์ํ ํ์ธ ์ค..."
|
37 |
+
sleep 15
|
38 |
+
|
39 |
+
# ํฌ์ค์ฒดํฌ
|
40 |
+
echo "๐ฅ ํฌ์ค์ฒดํฌ ์ค..."
|
41 |
+
for i in {1..30}; do
|
42 |
+
if curl -f http://localhost:8001/health &> /dev/null; then
|
43 |
+
echo "โ
Lily LLM API ์๋น์ค ์ ์"
|
44 |
+
break
|
45 |
+
fi
|
46 |
+
|
47 |
+
if [ $i -eq 30 ]; then
|
48 |
+
echo "โ ์๋น์ค ์์ ์คํจ"
|
49 |
+
docker-compose -f docker-compose.gpu.yml logs
|
50 |
+
exit 1
|
51 |
+
fi
|
52 |
+
|
53 |
+
echo "โณ ์๋น์ค ์์ ๋๊ธฐ ์ค... ($i/30)"
|
54 |
+
sleep 2
|
55 |
+
done
|
56 |
+
|
57 |
+
# GPU ์ฌ์ฉ๋ ํ์ธ
|
58 |
+
echo "๐ฎ GPU ์ฌ์ฉ๋ ํ์ธ:"
|
59 |
+
nvidia-smi
|
60 |
+
|
61 |
+
# Hugging Face ๋ชจ๋ธ ํ
์คํธ
|
62 |
+
echo "๐งช Hugging Face ๋ชจ๋ธ ํ
์คํธ ์ค..."
|
63 |
+
python test_gpu_deployment.py
|
64 |
+
|
65 |
+
# ์ฑ๋ฅ ์ต์ ํ ์ ์ฉ
|
66 |
+
echo "โก ์ฑ๋ฅ ์ต์ ํ ์ ์ฉ ์ค..."
|
67 |
+
python performance_optimization.py
|
68 |
+
|
69 |
+
# ์๋น์ค ์ ๋ณด ์ถ๋ ฅ
|
70 |
+
echo ""
|
71 |
+
echo "๐ Hugging Face GPU ํ๊ฒฝ ๋ฐฐํฌ ์๋ฃ!"
|
72 |
+
echo ""
|
73 |
+
echo "๐ ์๋น์ค ์ ๋ณด:"
|
74 |
+
echo " - Lily LLM API: http://localhost:8001"
|
75 |
+
echo " - Hearth Chat: http://localhost:8000"
|
76 |
+
echo " - LaTeX-OCR Service: ๋ณ๋ ์ปจํ
์ด๋๋ก ์คํ ์ค"
|
77 |
+
echo ""
|
78 |
+
echo "๐ง ์ ์ฉํ ๋ช
๋ น์ด:"
|
79 |
+
echo " - ๋ก๊ทธ ํ์ธ: docker-compose -f docker-compose.gpu.yml logs -f"
|
80 |
+
echo " - ์๋น์ค ์ค์ง: docker-compose -f docker-compose.gpu.yml down"
|
81 |
+
echo " - ์๋น์ค ์ฌ์์: docker-compose -f docker-compose.gpu.yml restart"
|
82 |
+
echo ""
|
83 |
+
echo "๐งช ํ
์คํธ ๋ช
๋ น์ด:"
|
84 |
+
echo " - API ํ
์คํธ: curl http://localhost:8001/health"
|
85 |
+
echo " - GPU ํ
์คํธ: python test_gpu_deployment.py"
|
86 |
+
echo " - Hugging Face ํ
์คํธ: python huggingface_gpu_setup.py"
|
87 |
+
echo ""
|
88 |
+
echo "๐ก Hugging Face ๋ชจ๋ธ ์ฌ์ฉ:"
|
89 |
+
echo " - ๋ชจ๋ธ ๋ก๋: python huggingface_gpu_setup.py"
|
90 |
+
echo " - ํ ํฐ ์ค์ : huggingface-cli login"
|
deploy_gpu_windows.bat
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
echo ๐ Windows GPU ํ๊ฒฝ ๋ฐฐํฌ ์์
|
3 |
+
echo ========================================
|
4 |
+
|
5 |
+
REM GPU ํ๊ฒฝ ํ์ธ
|
6 |
+
echo ๐ GPU ํ๊ฒฝ ํ์ธ ์ค...
|
7 |
+
python check_gpu_environment.py
|
8 |
+
if %errorlevel% neq 0 (
|
9 |
+
echo โ GPU ํ๊ฒฝ ํ์ธ ์คํจ
|
10 |
+
echo ๐ก GPU ํ๊ฒฝ์ ๋จผ์ ์ค์ ํด์ฃผ์ธ์
|
11 |
+
pause
|
12 |
+
exit /b 1
|
13 |
+
)
|
14 |
+
|
15 |
+
REM Hugging Face ์ค์
|
16 |
+
echo ๐ง Hugging Face ํ๊ฒฝ ์ค์ ์ค...
|
17 |
+
python huggingface_gpu_setup.py
|
18 |
+
|
19 |
+
REM ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ
|
20 |
+
echo ๐งน ๊ธฐ์กด ์ปจํ
์ด๋ ์ ๋ฆฌ ์ค...
|
21 |
+
docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
|
22 |
+
|
23 |
+
REM GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ (Windows์์๋ ์ ํ์ )
|
24 |
+
echo ๐พ GPU ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ ์ค...
|
25 |
+
REM Windows์์๋ nvidia-smi --gpu-reset๊ฐ ์ ํ์ ์ด๋ฏ๋ก ๊ฑด๋๋
|
26 |
+
|
27 |
+
REM ์ด๋ฏธ์ง ๋น๋
|
28 |
+
echo ๐จ Docker ์ด๋ฏธ์ง ๋น๋ ์ค...
|
29 |
+
docker-compose -f docker-compose.gpu.yml build --no-cache
|
30 |
+
|
31 |
+
REM ์ปจํ
์ด๋ ์์
|
32 |
+
echo ๐ ์ปจํ
์ด๋ ์์ ์ค...
|
33 |
+
docker-compose -f docker-compose.gpu.yml up -d
|
34 |
+
|
35 |
+
REM ์๋น์ค ์ํ ํ์ธ
|
36 |
+
echo ๐ ์๋น์ค ์ํ ํ์ธ ์ค...
|
37 |
+
timeout /t 15 /nobreak >nul
|
38 |
+
|
39 |
+
REM ํฌ์ค์ฒดํฌ
|
40 |
+
echo ๐ฅ ํฌ์ค์ฒดํฌ ์ค...
|
41 |
+
for /l %%i in (1,1,30) do (
|
42 |
+
curl -f http://localhost:8001/health >nul 2>&1
|
43 |
+
if !errorlevel! equ 0 (
|
44 |
+
echo โ
Lily LLM API ์๋น์ค ์ ์
|
45 |
+
goto :health_check_passed
|
46 |
+
)
|
47 |
+
echo โณ ์๋น์ค ์์ ๋๊ธฐ ์ค... (%%i/30)
|
48 |
+
timeout /t 2 /nobreak >nul
|
49 |
+
)
|
50 |
+
echo โ ์๋น์ค ์์ ์คํจ
|
51 |
+
docker-compose -f docker-compose.gpu.yml logs
|
52 |
+
pause
|
53 |
+
exit /b 1
|
54 |
+
|
55 |
+
:health_check_passed
|
56 |
+
REM GPU ์ฌ์ฉ๋ ํ์ธ
|
57 |
+
echo ๐ฎ GPU ์ฌ์ฉ๋ ํ์ธ:
|
58 |
+
nvidia-smi 2>nul || echo โ ๏ธ nvidia-smi๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค
|
59 |
+
|
60 |
+
REM Hugging Face ๋ชจ๋ธ ํ
์คํธ
|
61 |
+
echo ๐งช Hugging Face ๋ชจ๋ธ ํ
์คํธ ์ค...
|
62 |
+
python test_gpu_deployment.py
|
63 |
+
|
64 |
+
REM ์ฑ๋ฅ ์ต์ ํ ์ ์ฉ
|
65 |
+
echo โก ์ฑ๋ฅ ์ต์ ํ ์ ์ฉ ์ค...
|
66 |
+
python performance_optimization.py
|
67 |
+
|
68 |
+
REM ์๋น์ค ์ ๋ณด ์ถ๋ ฅ
|
69 |
+
echo.
|
70 |
+
echo ๐ Windows GPU ํ๊ฒฝ ๋ฐฐํฌ ์๋ฃ!
|
71 |
+
echo.
|
72 |
+
echo ๐ ์๋น์ค ์ ๋ณด:
|
73 |
+
echo - Lily LLM API: http://localhost:8001
|
74 |
+
echo - Hearth Chat: http://localhost:8000
|
75 |
+
echo - LaTeX-OCR Service: ๋ณ๋ ์ปจํ
์ด๋๋ก ์คํ ์ค
|
76 |
+
echo.
|
77 |
+
echo ๐ง ์ ์ฉํ ๋ช
๋ น์ด:
|
78 |
+
echo - ๋ก๊ทธ ํ์ธ: docker-compose -f docker-compose.gpu.yml logs -f
|
79 |
+
echo - ์๋น์ค ์ค์ง: docker-compose -f docker-compose.gpu.yml down
|
80 |
+
echo - ์๋น์ค ์ฌ์์: docker-compose -f docker-compose.gpu.yml restart
|
81 |
+
echo.
|
82 |
+
echo ๐งช ํ
์คํธ ๋ช
๋ น์ด:
|
83 |
+
echo - API ํ
์คํธ: curl http://localhost:8001/health
|
84 |
+
echo - GPU ํ
์คํธ: python test_gpu_deployment.py
|
85 |
+
echo - Hugging Face ํ
์คํธ: python huggingface_gpu_setup.py
|
86 |
+
echo.
|
87 |
+
echo ๐ก Hugging Face ๋ชจ๋ธ ์ฌ์ฉ:
|
88 |
+
echo - ๋ชจ๋ธ ๋ก๋: python huggingface_gpu_setup.py
|
89 |
+
echo - ํ ํฐ ์ค์ : huggingface-cli login
|
90 |
+
echo.
|
91 |
+
pause
|
docker-compose.gpu.yml
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
lily-llm-api-gpu:
|
5 |
+
build:
|
6 |
+
context: .
|
7 |
+
dockerfile: Dockerfile.gpu
|
8 |
+
container_name: lily-llm-api-gpu
|
9 |
+
ports:
|
10 |
+
- "8001:8001"
|
11 |
+
volumes:
|
12 |
+
- ./uploads:/app/uploads
|
13 |
+
- ./vector_stores:/app/vector_stores
|
14 |
+
- ./latex_ocr_faiss_stores:/app/latex_ocr_faiss_stores
|
15 |
+
- ./lily_llm_media:/app/lily_llm_media
|
16 |
+
- ./hearth_llm_model:/app/hearth_llm_model
|
17 |
+
environment:
|
18 |
+
- CUDA_VISIBLE_DEVICES=0
|
19 |
+
- PYTHONPATH=/app
|
20 |
+
- LILY_LLM_ENV=production
|
21 |
+
deploy:
|
22 |
+
resources:
|
23 |
+
reservations:
|
24 |
+
devices:
|
25 |
+
- driver: nvidia
|
26 |
+
count: 1
|
27 |
+
capabilities: [ gpu ]
|
28 |
+
restart: unless-stopped
|
29 |
+
networks:
|
30 |
+
- lily-network
|
31 |
+
|
32 |
+
# LaTeX-OCR ์ ์ฉ ์ปจํ
์ด๋ (CPU ๊ธฐ๋ฐ)
|
33 |
+
latex-ocr-service:
|
34 |
+
build:
|
35 |
+
context: .
|
36 |
+
dockerfile: Dockerfile.latex-ocr
|
37 |
+
container_name: latex-ocr-service
|
38 |
+
volumes:
|
39 |
+
- ./uploads:/app/uploads
|
40 |
+
- ./latex_ocr_faiss_stores:/app/latex_ocr_faiss_stores
|
41 |
+
environment:
|
42 |
+
- PYTHONPATH=/app
|
43 |
+
restart: unless-stopped
|
44 |
+
networks:
|
45 |
+
- lily-network
|
46 |
+
|
47 |
+
# Hearth Chat ์๋น์ค (๋ณ๋ ์ปจํ
์ด๋)
|
48 |
+
hearth-chat:
|
49 |
+
image: node:18-alpine
|
50 |
+
container_name: hearth-chat
|
51 |
+
working_dir: /app
|
52 |
+
volumes:
|
53 |
+
- ../hearth_chat_package:/app
|
54 |
+
ports:
|
55 |
+
- "8000:8000"
|
56 |
+
command: [ "npm", "start" ]
|
57 |
+
restart: unless-stopped
|
58 |
+
networks:
|
59 |
+
- lily-network
|
60 |
+
|
61 |
+
networks:
|
62 |
+
lily-network:
|
63 |
+
driver: bridge
|
64 |
+
|
65 |
+
volumes:
|
66 |
+
lily-data:
|
docker-compose.yml
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
# Lily LLM API Server
|
5 |
+
lily-llm-api:
|
6 |
+
build: .
|
7 |
+
container_name: lily-llm-api
|
8 |
+
ports:
|
9 |
+
- "8001:8001"
|
10 |
+
environment:
|
11 |
+
- REDIS_URL=redis://redis:6379
|
12 |
+
- DATABASE_URL=sqlite:///app/data/lily_llm.db
|
13 |
+
- LOG_LEVEL=INFO
|
14 |
+
- CELERY_BROKER_URL=redis://redis:6379/0
|
15 |
+
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
16 |
+
volumes:
|
17 |
+
- ./data:/app/data
|
18 |
+
- ./logs:/app/logs
|
19 |
+
- ./models:/app/models
|
20 |
+
- ./uploads:/app/uploads
|
21 |
+
depends_on:
|
22 |
+
- redis
|
23 |
+
restart: unless-stopped
|
24 |
+
networks:
|
25 |
+
- lily-network
|
26 |
+
|
27 |
+
# Redis for Celery and caching
|
28 |
+
redis:
|
29 |
+
image: redis:7-alpine
|
30 |
+
container_name: lily-redis
|
31 |
+
ports:
|
32 |
+
- "6379:6379"
|
33 |
+
volumes:
|
34 |
+
- redis_data:/data
|
35 |
+
restart: unless-stopped
|
36 |
+
networks:
|
37 |
+
- lily-network
|
38 |
+
|
39 |
+
# Celery Worker
|
40 |
+
celery-worker:
|
41 |
+
build: .
|
42 |
+
container_name: lily-celery-worker
|
43 |
+
command: celery -A lily_llm_core.celery_app worker --loglevel=info
|
44 |
+
environment:
|
45 |
+
- REDIS_URL=redis://redis:6379
|
46 |
+
- DATABASE_URL=sqlite:///app/data/lily_llm.db
|
47 |
+
- CELERY_BROKER_URL=redis://redis:6379/0
|
48 |
+
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
49 |
+
volumes:
|
50 |
+
- ./data:/app/data
|
51 |
+
- ./logs:/app/logs
|
52 |
+
- ./models:/app/models
|
53 |
+
- ./uploads:/app/uploads
|
54 |
+
depends_on:
|
55 |
+
- redis
|
56 |
+
restart: unless-stopped
|
57 |
+
networks:
|
58 |
+
- lily-network
|
59 |
+
|
60 |
+
# Celery Beat (Scheduler)
|
61 |
+
celery-beat:
|
62 |
+
build: .
|
63 |
+
container_name: lily-celery-beat
|
64 |
+
command: celery -A lily_llm_core.celery_app beat --loglevel=info
|
65 |
+
environment:
|
66 |
+
- REDIS_URL=redis://redis:6379
|
67 |
+
- DATABASE_URL=sqlite:///app/data/lily_llm.db
|
68 |
+
- CELERY_BROKER_URL=redis://redis:6379/0
|
69 |
+
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
70 |
+
volumes:
|
71 |
+
- ./data:/app/data
|
72 |
+
- ./logs:/app/logs
|
73 |
+
depends_on:
|
74 |
+
- redis
|
75 |
+
restart: unless-stopped
|
76 |
+
networks:
|
77 |
+
- lily-network
|
78 |
+
|
79 |
+
# Flower (Celery Monitoring)
|
80 |
+
flower:
|
81 |
+
build: .
|
82 |
+
container_name: lily-flower
|
83 |
+
command: celery -A lily_llm_core.celery_app flower --port=5555
|
84 |
+
ports:
|
85 |
+
- "5555:5555"
|
86 |
+
environment:
|
87 |
+
- CELERY_BROKER_URL=redis://redis:6379/0
|
88 |
+
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
89 |
+
depends_on:
|
90 |
+
- redis
|
91 |
+
restart: unless-stopped
|
92 |
+
networks:
|
93 |
+
- lily-network
|
94 |
+
|
95 |
+
volumes:
|
96 |
+
redis_data:
|
97 |
+
|
98 |
+
|
99 |
+
networks:
|
100 |
+
lily-network:
|
101 |
+
driver: bridge
|
docs/API_REFERENCE.md
ADDED
@@ -0,0 +1,507 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API ์ฐธ์กฐ ๋ฌธ์
|
2 |
+
|
3 |
+
## ๐ ๊ฐ์
|
4 |
+
|
5 |
+
Lily LLM API๋ ๋ค์ํ ์ธ์ด ๋ชจ๋ธ์ ์ง์ํ๋ RESTful API ์๋ฒ์
๋๋ค. ํ
์คํธ ์์ฑ, ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ, RAG(Retrieval-Augmented Generation) ๊ธฐ๋ฅ์ ์ ๊ณตํฉ๋๋ค.
|
6 |
+
|
7 |
+
## ๐ ๊ธฐ๋ณธ ์ ๋ณด
|
8 |
+
|
9 |
+
- **Base URL**: `http://localhost:8001`
|
10 |
+
- **API ๋ฌธ์**: `http://localhost:8001/docs`
|
11 |
+
- **ReDoc ๋ฌธ์**: `http://localhost:8001/redoc`
|
12 |
+
|
13 |
+
## ๐ ์ธ์ฆ
|
14 |
+
|
15 |
+
### JWT ํ ํฐ ์ธ์ฆ
|
16 |
+
|
17 |
+
```bash
|
18 |
+
# ๋ก๊ทธ์ธ
|
19 |
+
curl -X POST "http://localhost:8001/auth/login" \
|
20 |
+
-H "Content-Type: application/x-www-form-urlencoded" \
|
21 |
+
-d "username=your_username&password=your_password"
|
22 |
+
|
23 |
+
# ์๋ต
|
24 |
+
{
|
25 |
+
"access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9...",
|
26 |
+
"refresh_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9...",
|
27 |
+
"token_type": "bearer"
|
28 |
+
}
|
29 |
+
```
|
30 |
+
|
31 |
+
### ๋ณดํธ๋ ์๋ํฌ์ธํธ ์ฌ์ฉ
|
32 |
+
|
33 |
+
```bash
|
34 |
+
curl -X GET "http://localhost:8001/auth/me" \
|
35 |
+
-H "Authorization: Bearer YOUR_ACCESS_TOKEN"
|
36 |
+
```
|
37 |
+
|
38 |
+
## ๐ค AI ๋ชจ๋ธ ๊ด๋ จ
|
39 |
+
|
40 |
+
### 1. ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํ
|
41 |
+
|
42 |
+
```http
|
43 |
+
GET /models
|
44 |
+
```
|
45 |
+
|
46 |
+
**์๋ต ์์:**
|
47 |
+
```json
|
48 |
+
{
|
49 |
+
"available_models": [
|
50 |
+
{
|
51 |
+
"model_id": "polyglot-ko-1.3b-chat",
|
52 |
+
"display_name": "Polyglot Korean 1.3B Chat",
|
53 |
+
"model_type": "text",
|
54 |
+
"description": "ํ๊ตญ์ด ํนํ ํ
์คํธ ์์ฑ ๋ชจ๋ธ"
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"model_id": "kanana-1.5-v-3b-instruct",
|
58 |
+
"display_name": "Kanana 1.5 v3B Instruct",
|
59 |
+
"model_type": "multimodal",
|
60 |
+
"description": "๋ฉํฐ๋ชจ๋ฌ ์ด๋ฏธ์ง+ํ
์คํธ ์ฒ๋ฆฌ ๋ชจ๋ธ"
|
61 |
+
}
|
62 |
+
],
|
63 |
+
"current_model": "polyglot-ko-1.3b-chat"
|
64 |
+
}
|
65 |
+
```
|
66 |
+
|
67 |
+
### 2. ํ
์คํธ ์์ฑ
|
68 |
+
|
69 |
+
```http
|
70 |
+
POST /generate
|
71 |
+
```
|
72 |
+
|
73 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
74 |
+
```json
|
75 |
+
{
|
76 |
+
"prompt": "์๋
ํ์ธ์, AI์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์.",
|
77 |
+
"model_id": "polyglot-ko-1.3b-chat",
|
78 |
+
"max_length": 200,
|
79 |
+
"temperature": 0.7,
|
80 |
+
"top_p": 0.9,
|
81 |
+
"do_sample": true
|
82 |
+
}
|
83 |
+
```
|
84 |
+
|
85 |
+
**์๋ต ์์:**
|
86 |
+
```json
|
87 |
+
{
|
88 |
+
"generated_text": "์๋
ํ์ธ์! AI(์ธ๊ณต์ง๋ฅ)๋ ์ธ๊ฐ์ ํ์ต๋ฅ๋ ฅ๊ณผ ์ถ๋ก ๋ฅ๋ ฅ์ ์ธ๊ณต์ ์ผ๋ก ๊ตฌํํ ์ปดํจํฐ ์์คํ
์
๋๋ค...",
|
89 |
+
"model_name": "polyglot-ko-1.3b-chat",
|
90 |
+
"processing_time": 2.34,
|
91 |
+
"tokens_generated": 45
|
92 |
+
}
|
93 |
+
```
|
94 |
+
|
95 |
+
### 3. ๋ฉํฐ๋ชจ๋ฌ ์์ฑ (์ด๋ฏธ์ง + ํ
์คํธ)
|
96 |
+
|
97 |
+
```http
|
98 |
+
POST /generate-multimodal
|
99 |
+
```
|
100 |
+
|
101 |
+
**์์ฒญ (multipart/form-data):**
|
102 |
+
```
|
103 |
+
prompt: "์ด ์ด๋ฏธ์ง์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์"
|
104 |
+
model_id: "kanana-1.5-v-3b-instruct"
|
105 |
+
max_length: 200
|
106 |
+
temperature: 0.7
|
107 |
+
image_files: [ํ์ผ1, ํ์ผ2, ...]
|
108 |
+
```
|
109 |
+
|
110 |
+
**์๋ต ์์:**
|
111 |
+
```json
|
112 |
+
{
|
113 |
+
"generated_text": "์ด ์ด๋ฏธ์ง๋ ์๋ฆ๋ค์ด ์์ฐ ํ๊ฒฝ์ ๋ณด์ฌ์ค๋๋ค...",
|
114 |
+
"model_name": "kanana-1.5-v-3b-instruct",
|
115 |
+
"processing_time": 15.67,
|
116 |
+
"images_processed": 2
|
117 |
+
}
|
118 |
+
```
|
119 |
+
|
120 |
+
## ๐ ๋ฌธ์ ์ฒ๋ฆฌ (RAG)
|
121 |
+
|
122 |
+
### 1. ๋ฌธ์ ์
๋ก๋
|
123 |
+
|
124 |
+
```http
|
125 |
+
POST /document/upload
|
126 |
+
```
|
127 |
+
|
128 |
+
**์์ฒญ (multipart/form-data):**
|
129 |
+
```
|
130 |
+
file: [PDF/DOC/DOCX/PPTX ํ์ผ]
|
131 |
+
user_id: "user123"
|
132 |
+
```
|
133 |
+
|
134 |
+
**์๋ต ์์:**
|
135 |
+
```json
|
136 |
+
{
|
137 |
+
"document_id": "doc_123456",
|
138 |
+
"filename": "sample.pdf",
|
139 |
+
"file_type": "pdf",
|
140 |
+
"file_size": 1024000,
|
141 |
+
"pages": 15,
|
142 |
+
"chunks": 45,
|
143 |
+
"upload_time": "2025-08-04T10:30:00Z"
|
144 |
+
}
|
145 |
+
```
|
146 |
+
|
147 |
+
### 2. RAG ์ฟผ๋ฆฌ
|
148 |
+
|
149 |
+
```http
|
150 |
+
POST /rag/generate
|
151 |
+
```
|
152 |
+
|
153 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
154 |
+
```json
|
155 |
+
{
|
156 |
+
"query": "์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋์ ๋ํด ์๋ ค์ฃผ์ธ์",
|
157 |
+
"user_id": "user123",
|
158 |
+
"max_length": 300,
|
159 |
+
"temperature": 0.7
|
160 |
+
}
|
161 |
+
```
|
162 |
+
|
163 |
+
**์๋ต ์์:**
|
164 |
+
```json
|
165 |
+
{
|
166 |
+
"response": "์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋๋ ๋งค์ฐ ๋ฐ์ต๋๋ค. ํ์ฌ ๋ฌธ์์ ๋ฐ๋ฅด๋ฉด...",
|
167 |
+
"sources": [
|
168 |
+
{
|
169 |
+
"document_id": "doc_123456",
|
170 |
+
"page": 5,
|
171 |
+
"chunk": "AI ๊ธฐ์ ์ ๋ฐ์ ๋ฐฉํฅ..."
|
172 |
+
}
|
173 |
+
],
|
174 |
+
"confidence": 0.85,
|
175 |
+
"processing_time": 3.45
|
176 |
+
}
|
177 |
+
```
|
178 |
+
|
179 |
+
### 3. ํ์ด๋ธ๋ฆฌ๋ RAG (์ด๋ฏธ์ง + ๋ฌธ์)
|
180 |
+
|
181 |
+
```http
|
182 |
+
POST /rag/generate-hybrid
|
183 |
+
```
|
184 |
+
|
185 |
+
**์์ฒญ (multipart/form-data):**
|
186 |
+
```
|
187 |
+
query: "์ด ์ด๋ฏธ์ง์ ๊ด๋ จ๋ ๋ฌธ์ ๋ด์ฉ์ ์ฐพ์์ฃผ์ธ์"
|
188 |
+
user_id: "user123"
|
189 |
+
image_files: [์ด๋ฏธ์ง ํ์ผ๋ค]
|
190 |
+
max_length: 300
|
191 |
+
temperature: 0.7
|
192 |
+
```
|
193 |
+
|
194 |
+
## ๐ฌ ์ฑํ
๋ฐ ์ธ์
๊ด๋ฆฌ
|
195 |
+
|
196 |
+
### 1. ์ฌ์ฉ์ ์์ฑ
|
197 |
+
|
198 |
+
```http
|
199 |
+
POST /user/create
|
200 |
+
```
|
201 |
+
|
202 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
203 |
+
```json
|
204 |
+
{
|
205 |
+
"user_id": "user123",
|
206 |
+
"username": "ํ
์คํธ์ฌ์ฉ์",
|
207 |
+
"email": "test@example.com"
|
208 |
+
}
|
209 |
+
```
|
210 |
+
|
211 |
+
### 2. ์ฑํ
์ธ์
์์ฑ
|
212 |
+
|
213 |
+
```http
|
214 |
+
POST /session/create
|
215 |
+
```
|
216 |
+
|
217 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
218 |
+
```json
|
219 |
+
{
|
220 |
+
"user_id": "user123",
|
221 |
+
"session_name": "AI ์๋ด ์ธ์
"
|
222 |
+
}
|
223 |
+
```
|
224 |
+
|
225 |
+
### 3. ๋ฉ์์ง ์ถ๊ฐ
|
226 |
+
|
227 |
+
```http
|
228 |
+
POST /chat/message
|
229 |
+
```
|
230 |
+
|
231 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
232 |
+
```json
|
233 |
+
{
|
234 |
+
"session_id": "session_123",
|
235 |
+
"user_id": "user123",
|
236 |
+
"message_type": "text",
|
237 |
+
"content": "์๋
ํ์ธ์!"
|
238 |
+
}
|
239 |
+
```
|
240 |
+
|
241 |
+
### 4. ์ฑํ
๊ธฐ๋ก ์กฐํ
|
242 |
+
|
243 |
+
```http
|
244 |
+
GET /chat/history/{session_id}
|
245 |
+
```
|
246 |
+
|
247 |
+
## ๐ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
|
248 |
+
|
249 |
+
### 1. ๋ฌธ์ ์ฒ๋ฆฌ ์์
|
250 |
+
|
251 |
+
```http
|
252 |
+
POST /tasks/document/process
|
253 |
+
```
|
254 |
+
|
255 |
+
**์์ฒญ ํ๋ผ๋ฏธํฐ:**
|
256 |
+
```json
|
257 |
+
{
|
258 |
+
"file_path": "/uploads/document.pdf",
|
259 |
+
"user_id": "user123"
|
260 |
+
}
|
261 |
+
```
|
262 |
+
|
263 |
+
**์๋ต ์์:**
|
264 |
+
```json
|
265 |
+
{
|
266 |
+
"task_id": "task_123456",
|
267 |
+
"status": "PENDING",
|
268 |
+
"message": "๋ฌธ์ ์ฒ๋ฆฌ ์์
์ด ์์๋์์ต๏ฟฝ๏ฟฝ๏ฟฝ๋ค."
|
269 |
+
}
|
270 |
+
```
|
271 |
+
|
272 |
+
### 2. ์์
์ํ ํ์ธ
|
273 |
+
|
274 |
+
```http
|
275 |
+
GET /tasks/{task_id}
|
276 |
+
```
|
277 |
+
|
278 |
+
**์๋ต ์์:**
|
279 |
+
```json
|
280 |
+
{
|
281 |
+
"task_id": "task_123456",
|
282 |
+
"status": "SUCCESS",
|
283 |
+
"result": {
|
284 |
+
"document_id": "doc_123456",
|
285 |
+
"chunks": 45
|
286 |
+
},
|
287 |
+
"progress": 100
|
288 |
+
}
|
289 |
+
```
|
290 |
+
|
291 |
+
## ๐ ๋ชจ๋ํฐ๋ง
|
292 |
+
|
293 |
+
### 1. ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง ์์
|
294 |
+
|
295 |
+
```http
|
296 |
+
POST /monitoring/start
|
297 |
+
```
|
298 |
+
|
299 |
+
### 2. ์ฑ๋ฅ ์ํ ์กฐํ
|
300 |
+
|
301 |
+
```http
|
302 |
+
GET /monitoring/status
|
303 |
+
```
|
304 |
+
|
305 |
+
**์๋ต ์์:**
|
306 |
+
```json
|
307 |
+
{
|
308 |
+
"current_metrics": {
|
309 |
+
"cpu_percent": 25.5,
|
310 |
+
"memory_percent": 68.2,
|
311 |
+
"memory_used_mb": 8192.0,
|
312 |
+
"disk_usage_percent": 45.0
|
313 |
+
},
|
314 |
+
"performance_stats": {
|
315 |
+
"avg_response_time": 1.23,
|
316 |
+
"avg_inference_time": 2.45,
|
317 |
+
"total_requests": 1250,
|
318 |
+
"success_rate": 98.5
|
319 |
+
},
|
320 |
+
"system_health": {
|
321 |
+
"status": "healthy",
|
322 |
+
"recommendations": []
|
323 |
+
}
|
324 |
+
}
|
325 |
+
```
|
326 |
+
|
327 |
+
### 3. ์์คํ
๊ฑด๊ฐ ์ํ
|
328 |
+
|
329 |
+
```http
|
330 |
+
GET /monitoring/health
|
331 |
+
```
|
332 |
+
|
333 |
+
## ๐ WebSocket
|
334 |
+
|
335 |
+
### ์ฐ๊ฒฐ
|
336 |
+
|
337 |
+
```javascript
|
338 |
+
const ws = new WebSocket('ws://localhost:8001/ws/user123');
|
339 |
+
|
340 |
+
ws.onopen = function() {
|
341 |
+
console.log('WebSocket ์ฐ๊ฒฐ๋จ');
|
342 |
+
};
|
343 |
+
|
344 |
+
ws.onmessage = function(event) {
|
345 |
+
const data = JSON.parse(event.data);
|
346 |
+
console.log('๋ฉ์์ง ์์ :', data);
|
347 |
+
};
|
348 |
+
```
|
349 |
+
|
350 |
+
### ๋ฉ์์ง ์ ์ก
|
351 |
+
|
352 |
+
```javascript
|
353 |
+
ws.send(JSON.stringify({
|
354 |
+
type: 'chat',
|
355 |
+
message: '์๋
ํ์ธ์!',
|
356 |
+
session_id: 'session_123'
|
357 |
+
}));
|
358 |
+
```
|
359 |
+
|
360 |
+
## ๐จ ์ค๋ฅ ์ฝ๋
|
361 |
+
|
362 |
+
| ์ฝ๋ | ์๋ฏธ | ํด๊ฒฐ ๋ฐฉ๋ฒ |
|
363 |
+
|------|------|-----------|
|
364 |
+
| 400 | ์๋ชป๋ ์์ฒญ | ์์ฒญ ํ๋ผ๋ฏธํฐ ํ์ธ |
|
365 |
+
| 401 | ์ธ์ฆ ์คํจ | ํ ํฐ ํ์ธ |
|
366 |
+
| 403 | ๊ถํ ์์ | ๊ถํ ํ์ธ |
|
367 |
+
| 404 | ๋ฆฌ์์ค ์์ | URL ํ์ธ |
|
368 |
+
| 422 | ๊ฒ์ฆ ์คํจ | ์์ฒญ ๋ฐ์ดํฐ ํ์ ํ์ธ |
|
369 |
+
| 500 | ์๋ฒ ์ค๋ฅ | ์๋ฒ ๋ก๊ทธ ํ์ธ |
|
370 |
+
| 503 | ์๋น์ค ๋ถ๊ฐ | ์๋น์ค ์ํ ํ์ธ |
|
371 |
+
|
372 |
+
## ๐ ์์ ์ฝ๋
|
373 |
+
|
374 |
+
### Python ํด๋ผ์ด์ธํธ
|
375 |
+
|
376 |
+
```python
|
377 |
+
import requests
|
378 |
+
import json
|
379 |
+
|
380 |
+
class LilyLLMClient:
|
381 |
+
def __init__(self, base_url="http://localhost:8001"):
|
382 |
+
self.base_url = base_url
|
383 |
+
self.token = None
|
384 |
+
|
385 |
+
def login(self, username, password):
|
386 |
+
response = requests.post(f"{self.base_url}/auth/login",
|
387 |
+
data={"username": username, "password": password})
|
388 |
+
if response.status_code == 200:
|
389 |
+
self.token = response.json()["access_token"]
|
390 |
+
return True
|
391 |
+
return False
|
392 |
+
|
393 |
+
def generate_text(self, prompt, model_id="polyglot-ko-1.3b-chat"):
|
394 |
+
headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
395 |
+
data = {
|
396 |
+
"prompt": prompt,
|
397 |
+
"model_id": model_id,
|
398 |
+
"max_length": 200,
|
399 |
+
"temperature": 0.7
|
400 |
+
}
|
401 |
+
response = requests.post(f"{self.base_url}/generate",
|
402 |
+
data=data, headers=headers)
|
403 |
+
return response.json()
|
404 |
+
|
405 |
+
def upload_document(self, file_path, user_id):
|
406 |
+
headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
|
407 |
+
with open(file_path, 'rb') as f:
|
408 |
+
files = {'file': f}
|
409 |
+
data = {'user_id': user_id}
|
410 |
+
response = requests.post(f"{self.base_url}/document/upload",
|
411 |
+
files=files, data=data, headers=headers)
|
412 |
+
return response.json()
|
413 |
+
|
414 |
+
# ์ฌ์ฉ ์์
|
415 |
+
client = LilyLLMClient()
|
416 |
+
if client.login("username", "password"):
|
417 |
+
result = client.generate_text("์๋
ํ์ธ์!")
|
418 |
+
print(result["generated_text"])
|
419 |
+
```
|
420 |
+
|
421 |
+
### JavaScript ํด๋ผ์ด์ธํธ
|
422 |
+
|
423 |
+
```javascript
|
424 |
+
class LilyLLMClient {
|
425 |
+
constructor(baseUrl = 'http://localhost:8001') {
|
426 |
+
this.baseUrl = baseUrl;
|
427 |
+
this.token = null;
|
428 |
+
}
|
429 |
+
|
430 |
+
async login(username, password) {
|
431 |
+
const response = await fetch(`${this.baseUrl}/auth/login`, {
|
432 |
+
method: 'POST',
|
433 |
+
headers: {
|
434 |
+
'Content-Type': 'application/x-www-form-urlencoded',
|
435 |
+
},
|
436 |
+
body: `username=${username}&password=${password}`
|
437 |
+
});
|
438 |
+
|
439 |
+
if (response.ok) {
|
440 |
+
const data = await response.json();
|
441 |
+
this.token = data.access_token;
|
442 |
+
return true;
|
443 |
+
}
|
444 |
+
return false;
|
445 |
+
}
|
446 |
+
|
447 |
+
async generateText(prompt, modelId = 'polyglot-ko-1.3b-chat') {
|
448 |
+
const headers = this.token ?
|
449 |
+
{'Authorization': `Bearer ${this.token}`} : {};
|
450 |
+
|
451 |
+
const formData = new FormData();
|
452 |
+
formData.append('prompt', prompt);
|
453 |
+
formData.append('model_id', modelId);
|
454 |
+
formData.append('max_length', '200');
|
455 |
+
formData.append('temperature', '0.7');
|
456 |
+
|
457 |
+
const response = await fetch(`${this.baseUrl}/generate`, {
|
458 |
+
method: 'POST',
|
459 |
+
headers,
|
460 |
+
body: formData
|
461 |
+
});
|
462 |
+
|
463 |
+
return await response.json();
|
464 |
+
}
|
465 |
+
}
|
466 |
+
|
467 |
+
// ์ฌ์ฉ ์์
|
468 |
+
const client = new LilyLLMClient();
|
469 |
+
client.login('username', 'password').then(async (success) => {
|
470 |
+
if (success) {
|
471 |
+
const result = await client.generateText('์๋
ํ์ธ์!');
|
472 |
+
console.log(result.generated_text);
|
473 |
+
}
|
474 |
+
});
|
475 |
+
```
|
476 |
+
|
477 |
+
## ๐ง ์ค์
|
478 |
+
|
479 |
+
### ํ๊ฒฝ ๋ณ์
|
480 |
+
|
481 |
+
```bash
|
482 |
+
# ์๋ฒ ์ค์
|
483 |
+
HOST=0.0.0.0
|
484 |
+
PORT=8001
|
485 |
+
LOG_LEVEL=INFO
|
486 |
+
|
487 |
+
# ๋ฐ์ดํฐ๋ฒ ์ด์ค
|
488 |
+
DATABASE_URL=sqlite:///app/data/lily_llm.db
|
489 |
+
|
490 |
+
# Redis
|
491 |
+
REDIS_URL=redis://localhost:6379
|
492 |
+
|
493 |
+
# Celery
|
494 |
+
CELERY_BROKER_URL=redis://localhost:6379/0
|
495 |
+
CELERY_RESULT_BACKEND=redis://localhost:6379/0
|
496 |
+
|
497 |
+
# ๋ณด์
|
498 |
+
SECRET_KEY=your-secret-key
|
499 |
+
JWT_SECRET_KEY=your-jwt-secret-key
|
500 |
+
```
|
501 |
+
|
502 |
+
## ๐ ์ถ๊ฐ ๋ฆฌ์์ค
|
503 |
+
|
504 |
+
- [FastAPI ๋ฌธ์](https://fastapi.tiangolo.com/)
|
505 |
+
- [Celery ๋ฌธ์](https://docs.celeryproject.org/)
|
506 |
+
- [Redis ๋ฌธ์](https://redis.io/documentation)
|
507 |
+
- [LangChain ๋ฌธ์](https://python.langchain.com/)
|
docs/USER_GUIDE.md
ADDED
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Lily LLM API ์ฌ์ฉ์ ๊ฐ์ด๋
|
2 |
+
|
3 |
+
## ๐ ๋ชฉ์ฐจ
|
4 |
+
|
5 |
+
1. [์์ํ๊ธฐ](#์์ํ๊ธฐ)
|
6 |
+
2. [๊ธฐ๋ณธ ๊ธฐ๋ฅ](#๊ธฐ๋ณธ-๊ธฐ๋ฅ)
|
7 |
+
3. [๊ณ ๊ธ ๊ธฐ๋ฅ](#๊ณ ๊ธ-๊ธฐ๋ฅ)
|
8 |
+
4. [๋ฌธ์ ํด๊ฒฐ](#๋ฌธ์ -ํด๊ฒฐ)
|
9 |
+
5. [๋ชจ๋ฒ ์ฌ๋ก](#๋ชจ๋ฒ-์ฌ๋ก)
|
10 |
+
|
11 |
+
## ๐ ์์ํ๊ธฐ
|
12 |
+
|
13 |
+
### ์์คํ
์๊ตฌ์ฌํญ
|
14 |
+
|
15 |
+
- **์ต์ ์ฌ์**:
|
16 |
+
- CPU: 4์ฝ์ด ์ด์
|
17 |
+
- RAM: 8GB ์ด์
|
18 |
+
- ์ ์ฅ๊ณต๊ฐ: 20GB ์ด์
|
19 |
+
- GPU: ์ ํ์ฌํญ (CUDA ์ง์ ์ ์ฑ๋ฅ ํฅ์)
|
20 |
+
|
21 |
+
- **๊ถ์ฅ ์ฌ์**:
|
22 |
+
- CPU: 8์ฝ์ด ์ด์
|
23 |
+
- RAM: 16GB ์ด์
|
24 |
+
- ์ ์ฅ๊ณต๊ฐ: 50GB ์ด์
|
25 |
+
- GPU: NVIDIA RTX 3060 ์ด์ (CUDA ์ง์)
|
26 |
+
|
27 |
+
### ์ค์น ๋ฐ ์คํ
|
28 |
+
|
29 |
+
#### 1. Docker๋ฅผ ์ฌ์ฉํ ๋ฐฐํฌ (๊ถ์ฅ)
|
30 |
+
|
31 |
+
```bash
|
32 |
+
# ์ ์ฅ์ ํด๋ก
|
33 |
+
git clone <repository-url>
|
34 |
+
cd lily_generate_package
|
35 |
+
|
36 |
+
# ๋ฐฐํฌ ์คํ
|
37 |
+
chmod +x scripts/deploy.sh
|
38 |
+
./scripts/deploy.sh deploy
|
39 |
+
|
40 |
+
# ์ํ ํ์ธ
|
41 |
+
./scripts/deploy.sh status
|
42 |
+
```
|
43 |
+
|
44 |
+
#### 2. ๋ก์ปฌ ๊ฐ๋ฐ ํ๊ฒฝ
|
45 |
+
|
46 |
+
```bash
|
47 |
+
# ๊ฐ์ํ๊ฒฝ ์์ฑ
|
48 |
+
python -m venv venv
|
49 |
+
source venv/bin/activate # Windows: venv\Scripts\activate
|
50 |
+
|
51 |
+
# ์์กด์ฑ ์ค์น
|
52 |
+
pip install -r requirements.txt
|
53 |
+
|
54 |
+
# NLTK ๋ฐ์ดํฐ ๋ค์ด๋ก๋
|
55 |
+
python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab')"
|
56 |
+
|
57 |
+
# ์๋ฒ ์คํ
|
58 |
+
python run_server_v2.py
|
59 |
+
```
|
60 |
+
|
61 |
+
### ์ฒซ ๋ฒ์งธ ์์ฒญ
|
62 |
+
|
63 |
+
```bash
|
64 |
+
# ์๋ฒ ์ํ ํ์ธ
|
65 |
+
curl http://localhost:8001/health
|
66 |
+
|
67 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํ
|
68 |
+
curl http://localhost:8001/models
|
69 |
+
|
70 |
+
# ๊ฐ๋จํ ํ
์คํธ ์์ฑ
|
71 |
+
curl -X POST http://localhost:8001/generate \
|
72 |
+
-H "Content-Type: application/x-www-form-urlencoded" \
|
73 |
+
-d "prompt=์๋
ํ์ธ์!&model_id=polyglot-ko-1.3b-chat&max_length=100"
|
74 |
+
```
|
75 |
+
|
76 |
+
## ๐ค ๊ธฐ๋ณธ ๊ธฐ๋ฅ
|
77 |
+
|
78 |
+
### 1. ํ
์คํธ ์์ฑ
|
79 |
+
|
80 |
+
#### ๋จ์ ํ
์คํธ ์์ฑ
|
81 |
+
|
82 |
+
```python
|
83 |
+
import requests
|
84 |
+
|
85 |
+
def generate_text(prompt, model_id="polyglot-ko-1.3b-chat"):
|
86 |
+
url = "http://localhost:8001/generate"
|
87 |
+
data = {
|
88 |
+
"prompt": prompt,
|
89 |
+
"model_id": model_id,
|
90 |
+
"max_length": 200,
|
91 |
+
"temperature": 0.7,
|
92 |
+
"top_p": 0.9,
|
93 |
+
"do_sample": True
|
94 |
+
}
|
95 |
+
|
96 |
+
response = requests.post(url, data=data)
|
97 |
+
return response.json()
|
98 |
+
|
99 |
+
# ์ฌ์ฉ ์์
|
100 |
+
result = generate_text("์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์.")
|
101 |
+
print(result["generated_text"])
|
102 |
+
```
|
103 |
+
|
104 |
+
#### ํ๋ผ๋ฏธํฐ ์ค๋ช
|
105 |
+
|
106 |
+
| ํ๋ผ๋ฏธํฐ | ์ค๋ช
| ๊ธฐ๋ณธ๊ฐ | ๋ฒ์ |
|
107 |
+
|----------|------|--------|------|
|
108 |
+
| `prompt` | ์
๋ ฅ ํ
์คํธ | ํ์ | - |
|
109 |
+
| `model_id` | ์ฌ์ฉํ ๋ชจ๋ธ | polyglot-ko-1.3b-chat | ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก |
|
110 |
+
| `max_length` | ์ต๋ ํ ํฐ ์ | 200 | 1-4000 |
|
111 |
+
| `temperature` | ์ฐฝ์์ฑ ์กฐ์ | 0.7 | 0.0-2.0 |
|
112 |
+
| `top_p` | ๋์ ํ๋ฅ ์๊ณ๊ฐ | 0.9 | 0.0-1.0 |
|
113 |
+
| `do_sample` | ์ํ๋ง ์ฌ์ฉ ์ฌ๋ถ | True | True/False |
|
114 |
+
|
115 |
+
### 2. ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ
|
116 |
+
|
117 |
+
#### ์ด๋ฏธ์ง์ ํ
์คํธ ํจ๊ป ์ฒ๋ฆฌ
|
118 |
+
|
119 |
+
```python
|
120 |
+
def generate_multimodal(prompt, image_files, model_id="kanana-1.5-v-3b-instruct"):
|
121 |
+
url = "http://localhost:8001/generate-multimodal"
|
122 |
+
|
123 |
+
files = []
|
124 |
+
for i, image_file in enumerate(image_files):
|
125 |
+
files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
|
126 |
+
|
127 |
+
data = {
|
128 |
+
"prompt": prompt,
|
129 |
+
"model_id": model_id,
|
130 |
+
"max_length": 200,
|
131 |
+
"temperature": 0.7
|
132 |
+
}
|
133 |
+
|
134 |
+
response = requests.post(url, files=files, data=data)
|
135 |
+
return response.json()
|
136 |
+
|
137 |
+
# ์ฌ์ฉ ์์
|
138 |
+
result = generate_multimodal(
|
139 |
+
"์ด ์ด๋ฏธ์ง์ ๋ํด ์ค๋ช
ํด์ฃผ์ธ์.",
|
140 |
+
["image1.jpg", "image2.jpg"]
|
141 |
+
)
|
142 |
+
print(result["generated_text"])
|
143 |
+
```
|
144 |
+
|
145 |
+
### 3. ์ฌ์ฉ์ ๊ด๋ฆฌ
|
146 |
+
|
147 |
+
#### ์ฌ์ฉ์ ๋ฑ๋ก ๋ฐ ๋ก๊ทธ์ธ
|
148 |
+
|
149 |
+
```python
|
150 |
+
def register_user(username, email, password):
|
151 |
+
url = "http://localhost:8001/auth/register"
|
152 |
+
data = {
|
153 |
+
"username": username,
|
154 |
+
"email": email,
|
155 |
+
"password": password
|
156 |
+
}
|
157 |
+
|
158 |
+
response = requests.post(url, data=data)
|
159 |
+
return response.json()
|
160 |
+
|
161 |
+
def login_user(username, password):
|
162 |
+
url = "http://localhost:8001/auth/login"
|
163 |
+
data = {
|
164 |
+
"username": username,
|
165 |
+
"password": password
|
166 |
+
}
|
167 |
+
|
168 |
+
response = requests.post(url, data=data)
|
169 |
+
return response.json()
|
170 |
+
|
171 |
+
# ์ฌ์ฉ ์์
|
172 |
+
# 1. ์ฌ์ฉ์ ๋ฑ๋ก
|
173 |
+
register_result = register_user("testuser", "test@example.com", "password123")
|
174 |
+
access_token = register_result["access_token"]
|
175 |
+
|
176 |
+
# 2. ๋ก๊ทธ์ธ
|
177 |
+
login_result = login_user("testuser", "password123")
|
178 |
+
access_token = login_result["access_token"]
|
179 |
+
```
|
180 |
+
|
181 |
+
#### ์ธ์ฆ์ด ํ์ํ ์์ฒญ
|
182 |
+
|
183 |
+
```python
|
184 |
+
def authenticated_request(url, data, token):
|
185 |
+
headers = {"Authorization": f"Bearer {token}"}
|
186 |
+
response = requests.post(url, data=data, headers=headers)
|
187 |
+
return response.json()
|
188 |
+
|
189 |
+
# ์ฌ์ฉ ์์
|
190 |
+
result = authenticated_request(
|
191 |
+
"http://localhost:8001/generate",
|
192 |
+
{"prompt": "์๋
ํ์ธ์!", "model_id": "polyglot-ko-1.3b-chat"},
|
193 |
+
access_token
|
194 |
+
)
|
195 |
+
```
|
196 |
+
|
197 |
+
## ๐ ๊ณ ๊ธ ๊ธฐ๋ฅ
|
198 |
+
|
199 |
+
### 1. ๋ฌธ์ ์ฒ๋ฆฌ (RAG)
|
200 |
+
|
201 |
+
#### ๋ฌธ์ ์
๋ก๋
|
202 |
+
|
203 |
+
```python
|
204 |
+
def upload_document(file_path, user_id, token=None):
|
205 |
+
url = "http://localhost:8001/document/upload"
|
206 |
+
|
207 |
+
with open(file_path, 'rb') as f:
|
208 |
+
files = {'file': f}
|
209 |
+
data = {'user_id': user_id}
|
210 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
211 |
+
|
212 |
+
response = requests.post(url, files=files, data=data, headers=headers)
|
213 |
+
return response.json()
|
214 |
+
|
215 |
+
# ์ฌ์ฉ ์์
|
216 |
+
result = upload_document("document.pdf", "user123", access_token)
|
217 |
+
document_id = result["document_id"]
|
218 |
+
```
|
219 |
+
|
220 |
+
#### RAG ์ฟผ๋ฆฌ
|
221 |
+
|
222 |
+
```python
|
223 |
+
def rag_query(query, user_id, token=None):
|
224 |
+
url = "http://localhost:8001/rag/generate"
|
225 |
+
|
226 |
+
data = {
|
227 |
+
"query": query,
|
228 |
+
"user_id": user_id,
|
229 |
+
"max_length": 300,
|
230 |
+
"temperature": 0.7
|
231 |
+
}
|
232 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
233 |
+
|
234 |
+
response = requests.post(url, data=data, headers=headers)
|
235 |
+
return response.json()
|
236 |
+
|
237 |
+
# ์ฌ์ฉ ์์
|
238 |
+
result = rag_query("์ธ๊ณต์ง๋ฅ์ ๋ฏธ๋์ ๋ํด ์๋ ค์ฃผ์ธ์.", "user123", access_token)
|
239 |
+
print(result["response"])
|
240 |
+
print("์ถ์ฒ:", result["sources"])
|
241 |
+
```
|
242 |
+
|
243 |
+
#### ํ์ด๋ธ๋ฆฌ๋ RAG (์ด๋ฏธ์ง + ๋ฌธ์)
|
244 |
+
|
245 |
+
```python
|
246 |
+
def hybrid_rag_query(query, image_files, user_id, token=None):
|
247 |
+
url = "http://localhost:8001/rag/generate-hybrid"
|
248 |
+
|
249 |
+
files = []
|
250 |
+
for i, image_file in enumerate(image_files):
|
251 |
+
files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
|
252 |
+
|
253 |
+
data = {
|
254 |
+
"query": query,
|
255 |
+
"user_id": user_id,
|
256 |
+
"max_length": 300,
|
257 |
+
"temperature": 0.7
|
258 |
+
}
|
259 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
260 |
+
|
261 |
+
response = requests.post(url, files=files, data=data, headers=headers)
|
262 |
+
return response.json()
|
263 |
+
```
|
264 |
+
|
265 |
+
### 2. ์ฑํ
์ธ์
๊ด๋ฆฌ
|
266 |
+
|
267 |
+
#### ์ธ์
์์ฑ ๋ฐ ๋ฉ์์ง ๊ด๋ฆฌ
|
268 |
+
|
269 |
+
```python
|
270 |
+
def create_chat_session(user_id, session_name, token=None):
|
271 |
+
url = "http://localhost:8001/session/create"
|
272 |
+
|
273 |
+
data = {
|
274 |
+
"user_id": user_id,
|
275 |
+
"session_name": session_name
|
276 |
+
}
|
277 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
278 |
+
|
279 |
+
response = requests.post(url, data=data, headers=headers)
|
280 |
+
return response.json()
|
281 |
+
|
282 |
+
def add_chat_message(session_id, user_id, content, token=None):
|
283 |
+
url = "http://localhost:8001/chat/message"
|
284 |
+
|
285 |
+
data = {
|
286 |
+
"session_id": session_id,
|
287 |
+
"user_id": user_id,
|
288 |
+
"message_type": "text",
|
289 |
+
"content": content
|
290 |
+
}
|
291 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
292 |
+
|
293 |
+
response = requests.post(url, data=data, headers=headers)
|
294 |
+
return response.json()
|
295 |
+
|
296 |
+
def get_chat_history(session_id, token=None):
|
297 |
+
url = f"http://localhost:8001/chat/history/{session_id}"
|
298 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
299 |
+
|
300 |
+
response = requests.get(url, headers=headers)
|
301 |
+
return response.json()
|
302 |
+
|
303 |
+
# ์ฌ์ฉ ์์
|
304 |
+
# 1. ์ธ์
์์ฑ
|
305 |
+
session_result = create_chat_session("user123", "AI ์๋ด", access_token)
|
306 |
+
session_id = session_result["session_id"]
|
307 |
+
|
308 |
+
# 2. ๋ฉ์์ง ์ถ๊ฐ
|
309 |
+
add_chat_message(session_id, "user123", "์๋
ํ์ธ์!", access_token)
|
310 |
+
|
311 |
+
# 3. ์ฑํ
๊ธฐ๋ก ์กฐํ
|
312 |
+
history = get_chat_history(session_id, access_token)
|
313 |
+
for message in history:
|
314 |
+
print(f"{message['timestamp']}: {message['content']}")
|
315 |
+
```
|
316 |
+
|
317 |
+
### 3. ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
|
318 |
+
|
319 |
+
#### ๋ฌธ์ ์ฒ๋ฆฌ ์์
|
320 |
+
|
321 |
+
```python
|
322 |
+
def start_document_processing(file_path, user_id, token=None):
|
323 |
+
url = "http://localhost:8001/tasks/document/process"
|
324 |
+
|
325 |
+
data = {
|
326 |
+
"file_path": file_path,
|
327 |
+
"user_id": user_id
|
328 |
+
}
|
329 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
330 |
+
|
331 |
+
response = requests.post(url, data=data, headers=headers)
|
332 |
+
return response.json()
|
333 |
+
|
334 |
+
def check_task_status(task_id, token=None):
|
335 |
+
url = f"http://localhost:8001/tasks/{task_id}"
|
336 |
+
headers = {"Authorization": f"Bearer {token}"} if token else {}
|
337 |
+
|
338 |
+
response = requests.get(url, headers=headers)
|
339 |
+
return response.json()
|
340 |
+
|
341 |
+
# ์ฌ์ฉ ์์
|
342 |
+
# 1. ์์
์์
|
343 |
+
task_result = start_document_processing("/path/to/document.pdf", "user123", access_token)
|
344 |
+
task_id = task_result["task_id"]
|
345 |
+
|
346 |
+
# 2. ์์
์ํ ํ์ธ
|
347 |
+
import time
|
348 |
+
while True:
|
349 |
+
status = check_task_status(task_id, access_token)
|
350 |
+
print(f"์ํ: {status['status']}, ์งํ๋ฅ : {status.get('progress', 0)}%")
|
351 |
+
|
352 |
+
if status['status'] in ['SUCCESS', 'FAILURE']:
|
353 |
+
break
|
354 |
+
|
355 |
+
time.sleep(5)
|
356 |
+
```
|
357 |
+
|
358 |
+
### 4. ๋ชจ๋ํฐ๋ง
|
359 |
+
|
360 |
+
#### ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง
|
361 |
+
|
362 |
+
```python
|
363 |
+
def start_monitoring():
|
364 |
+
url = "http://localhost:8001/monitoring/start"
|
365 |
+
response = requests.post(url)
|
366 |
+
return response.json()
|
367 |
+
|
368 |
+
def get_monitoring_status():
|
369 |
+
url = "http://localhost:8001/monitoring/status"
|
370 |
+
response = requests.get(url)
|
371 |
+
return response.json()
|
372 |
+
|
373 |
+
def get_system_health():
|
374 |
+
url = "http://localhost:8001/monitoring/health"
|
375 |
+
response = requests.get(url)
|
376 |
+
return response.json()
|
377 |
+
|
378 |
+
# ์ฌ์ฉ ์์
|
379 |
+
# 1. ๋ชจ๋ํฐ๋ง ์์
|
380 |
+
start_monitoring()
|
381 |
+
|
382 |
+
# 2. ์ํ ํ์ธ
|
383 |
+
status = get_monitoring_status()
|
384 |
+
print(f"CPU ์ฌ์ฉ๋ฅ : {status['current_metrics']['cpu_percent']}%")
|
385 |
+
print(f"๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ฅ : {status['current_metrics']['memory_percent']}%")
|
386 |
+
|
387 |
+
# 3. ์์คํ
๊ฑด๊ฐ ์ํ
|
388 |
+
health = get_system_health()
|
389 |
+
print(f"์์คํ
์ํ: {health['status']}")
|
390 |
+
for recommendation in health['recommendations']:
|
391 |
+
print(f"๊ถ์ฅ์ฌํญ: {recommendation}")
|
392 |
+
```
|
393 |
+
|
394 |
+
## ๐ WebSocket ์ค์๊ฐ ์ฑํ
|
395 |
+
|
396 |
+
### WebSocket ํด๋ผ์ด์ธํธ
|
397 |
+
|
398 |
+
```javascript
|
399 |
+
class LilyLLMWebSocket {
|
400 |
+
constructor(userId) {
|
401 |
+
this.userId = userId;
|
402 |
+
this.ws = null;
|
403 |
+
this.messageHandlers = [];
|
404 |
+
}
|
405 |
+
|
406 |
+
connect() {
|
407 |
+
this.ws = new WebSocket(`ws://localhost:8001/ws/${this.userId}`);
|
408 |
+
|
409 |
+
this.ws.onopen = () => {
|
410 |
+
console.log('WebSocket ์ฐ๊ฒฐ๋จ');
|
411 |
+
};
|
412 |
+
|
413 |
+
this.ws.onmessage = (event) => {
|
414 |
+
const data = JSON.parse(event.data);
|
415 |
+
this.handleMessage(data);
|
416 |
+
};
|
417 |
+
|
418 |
+
this.ws.onclose = () => {
|
419 |
+
console.log('WebSocket ์ฐ๊ฒฐ ์ข
๋ฃ');
|
420 |
+
};
|
421 |
+
|
422 |
+
this.ws.onerror = (error) => {
|
423 |
+
console.error('WebSocket ์ค๋ฅ:', error);
|
424 |
+
};
|
425 |
+
}
|
426 |
+
|
427 |
+
sendMessage(message, sessionId) {
|
428 |
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
429 |
+
this.ws.send(JSON.stringify({
|
430 |
+
type: 'chat',
|
431 |
+
message: message,
|
432 |
+
session_id: sessionId
|
433 |
+
}));
|
434 |
+
}
|
435 |
+
}
|
436 |
+
|
437 |
+
addMessageHandler(handler) {
|
438 |
+
this.messageHandlers.push(handler);
|
439 |
+
}
|
440 |
+
|
441 |
+
handleMessage(data) {
|
442 |
+
this.messageHandlers.forEach(handler => handler(data));
|
443 |
+
}
|
444 |
+
|
445 |
+
disconnect() {
|
446 |
+
if (this.ws) {
|
447 |
+
this.ws.close();
|
448 |
+
}
|
449 |
+
}
|
450 |
+
}
|
451 |
+
|
452 |
+
// ์ฌ์ฉ ์์
|
453 |
+
const wsClient = new LilyLLMWebSocket('user123');
|
454 |
+
wsClient.connect();
|
455 |
+
|
456 |
+
wsClient.addMessageHandler((data) => {
|
457 |
+
console.log('๋ฉ์์ง ์์ :', data);
|
458 |
+
});
|
459 |
+
|
460 |
+
wsClient.sendMessage('์๋
ํ์ธ์!', 'session123');
|
461 |
+
```
|
462 |
+
|
463 |
+
## ๐จ ๋ฌธ์ ํด๊ฒฐ
|
464 |
+
|
465 |
+
### ์ผ๋ฐ์ ์ธ ๋ฌธ์ ๋ค
|
466 |
+
|
467 |
+
#### 1. ์๋ฒ ์ฐ๊ฒฐ ์คํจ
|
468 |
+
|
469 |
+
**์ฆ์**: `Connection refused` ๋๋ `Failed to establish a new connection`
|
470 |
+
|
471 |
+
**ํด๊ฒฐ ๋ฐฉ๋ฒ**:
|
472 |
+
```bash
|
473 |
+
# ์๋ฒ ์ํ ํ์ธ
|
474 |
+
curl http://localhost:8001/health
|
475 |
+
|
476 |
+
# ์๋ฒ ์ฌ์์
|
477 |
+
./scripts/deploy.sh restart
|
478 |
+
|
479 |
+
# ๋ก๊ทธ ํ์ธ
|
480 |
+
./scripts/deploy.sh logs
|
481 |
+
```
|
482 |
+
|
483 |
+
#### 2. ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ
|
484 |
+
|
485 |
+
**์ฆ์**: `Out of memory` ๋๋ ์๋ต ์๋ ์ ํ
|
486 |
+
|
487 |
+
**ํด๊ฒฐ ๋ฐฉ๋ฒ**:
|
488 |
+
```bash
|
489 |
+
# ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ํ์ธ
|
490 |
+
docker stats
|
491 |
+
|
492 |
+
# ๋ถํ์ํ ์ปจํ
์ด๋ ์ ๋ฆฌ
|
493 |
+
docker system prune -f
|
494 |
+
|
495 |
+
# ๋ฆฌ์์ค ์ ํ ์ค์ (docker-compose.yml)
|
496 |
+
services:
|
497 |
+
lily-llm-api:
|
498 |
+
deploy:
|
499 |
+
resources:
|
500 |
+
limits:
|
501 |
+
memory: 4G
|
502 |
+
```
|
503 |
+
|
504 |
+
#### 3. ๋ชจ๋ธ ๋ก๋ฉ ์คํจ
|
505 |
+
|
506 |
+
**์ฆ์**: `Model not found` ๋๋ ๋ชจ๋ธ ๊ด๋ จ ์ค๋ฅ
|
507 |
+
|
508 |
+
**ํด๊ฒฐ ๋ฐฉ๋ฒ**:
|
509 |
+
```bash
|
510 |
+
# ๋ชจ๋ธ ๋ชฉ๋ก ํ์ธ
|
511 |
+
curl http://localhost:8001/models
|
512 |
+
|
513 |
+
# ๋ชจ๋ธ ํ์ผ ํ์ธ
|
514 |
+
ls -la models/
|
515 |
+
|
516 |
+
# ์๋ฒ ์ฌ์์
|
517 |
+
./scripts/deploy.sh restart
|
518 |
+
```
|
519 |
+
|
520 |
+
#### 4. ์ธ์ฆ ์ค๋ฅ
|
521 |
+
|
522 |
+
**์ฆ์**: `401 Unauthorized` ๋๋ `403 Forbidden`
|
523 |
+
|
524 |
+
**ํด๊ฒฐ ๋ฐฉ๋ฒ**:
|
525 |
+
```python
|
526 |
+
# ํ ํฐ ๊ฐฑ์
|
527 |
+
def refresh_token(refresh_token):
|
528 |
+
url = "http://localhost:8001/auth/refresh"
|
529 |
+
data = {"refresh_token": refresh_token}
|
530 |
+
response = requests.post(url, data=data)
|
531 |
+
return response.json()
|
532 |
+
|
533 |
+
# ์๋ก์ด ํ ํฐ์ผ๋ก ์์ฒญ
|
534 |
+
new_tokens = refresh_token(old_refresh_token)
|
535 |
+
access_token = new_tokens["access_token"]
|
536 |
+
```
|
537 |
+
|
538 |
+
### ์ฑ๋ฅ ์ต์ ํ
|
539 |
+
|
540 |
+
#### 1. ๋ฐฐ์น ์ฒ๋ฆฌ
|
541 |
+
|
542 |
+
```python
|
543 |
+
def batch_generate_texts(prompts, model_id="polyglot-ko-1.3b-chat"):
|
544 |
+
results = []
|
545 |
+
for prompt in prompts:
|
546 |
+
result = generate_text(prompt, model_id)
|
547 |
+
results.append(result)
|
548 |
+
return results
|
549 |
+
|
550 |
+
# ์ฌ์ฉ ์์
|
551 |
+
prompts = [
|
552 |
+
"์ฒซ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค.",
|
553 |
+
"๋ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค.",
|
554 |
+
"์ธ ๋ฒ์งธ ์ง๋ฌธ์
๋๋ค."
|
555 |
+
]
|
556 |
+
results = batch_generate_texts(prompts)
|
557 |
+
```
|
558 |
+
|
559 |
+
#### 2. ์บ์ฑ ํ์ฉ
|
560 |
+
|
561 |
+
```python
|
562 |
+
import redis
|
563 |
+
import json
|
564 |
+
|
565 |
+
class CachedLilyLLMClient:
|
566 |
+
def __init__(self, base_url="http://localhost:8001"):
|
567 |
+
self.base_url = base_url
|
568 |
+
self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
569 |
+
|
570 |
+
def generate_text_with_cache(self, prompt, model_id="polyglot-ko-1.3b-chat"):
|
571 |
+
# ์บ์ ํค ์์ฑ
|
572 |
+
cache_key = f"text_gen:{hash(prompt + model_id)}"
|
573 |
+
|
574 |
+
# ์บ์์์ ํ์ธ
|
575 |
+
cached_result = self.redis_client.get(cache_key)
|
576 |
+
if cached_result:
|
577 |
+
return json.loads(cached_result)
|
578 |
+
|
579 |
+
# API ํธ์ถ
|
580 |
+
result = generate_text(prompt, model_id)
|
581 |
+
|
582 |
+
# ์บ์์ ์ ์ฅ (1์๊ฐ)
|
583 |
+
self.redis_client.setex(cache_key, 3600, json.dumps(result))
|
584 |
+
|
585 |
+
return result
|
586 |
+
```
|
587 |
+
|
588 |
+
## ๐ ๋ชจ๋ฒ ์ฌ๋ก
|
589 |
+
|
590 |
+
### 1. ์๋ฌ ์ฒ๋ฆฌ
|
591 |
+
|
592 |
+
```python
|
593 |
+
import requests
|
594 |
+
from requests.exceptions import RequestException
|
595 |
+
|
596 |
+
def safe_api_call(func, *args, **kwargs):
|
597 |
+
try:
|
598 |
+
return func(*args, **kwargs)
|
599 |
+
except RequestException as e:
|
600 |
+
print(f"๋คํธ์ํฌ ์ค๋ฅ: {e}")
|
601 |
+
return None
|
602 |
+
except Exception as e:
|
603 |
+
print(f"์์์น ๋ชปํ ์ค๋ฅ: {e}")
|
604 |
+
return None
|
605 |
+
|
606 |
+
# ์ฌ์ฉ ์์
|
607 |
+
result = safe_api_call(generate_text, "์๋
ํ์ธ์!")
|
608 |
+
if result:
|
609 |
+
print(result["generated_text"])
|
610 |
+
```
|
611 |
+
|
612 |
+
### 2. ์ฌ์๋ ๋ก์ง
|
613 |
+
|
614 |
+
```python
|
615 |
+
import time
|
616 |
+
from functools import wraps
|
617 |
+
|
618 |
+
def retry_on_failure(max_retries=3, delay=1):
|
619 |
+
def decorator(func):
|
620 |
+
@wraps(func)
|
621 |
+
def wrapper(*args, **kwargs):
|
622 |
+
for attempt in range(max_retries):
|
623 |
+
try:
|
624 |
+
return func(*args, **kwargs)
|
625 |
+
except Exception as e:
|
626 |
+
if attempt == max_retries - 1:
|
627 |
+
raise e
|
628 |
+
print(f"์๋ {attempt + 1} ์คํจ, {delay}์ด ํ ์ฌ์๋...")
|
629 |
+
time.sleep(delay)
|
630 |
+
return None
|
631 |
+
return wrapper
|
632 |
+
return decorator
|
633 |
+
|
634 |
+
# ์ฌ์ฉ ์์
|
635 |
+
@retry_on_failure(max_retries=3, delay=2)
|
636 |
+
def robust_generate_text(prompt):
|
637 |
+
return generate_text(prompt)
|
638 |
+
```
|
639 |
+
|
640 |
+
### 3. ๋น๋๊ธฐ ์ฒ๋ฆฌ
|
641 |
+
|
642 |
+
```python
|
643 |
+
import asyncio
|
644 |
+
import aiohttp
|
645 |
+
|
646 |
+
async def async_generate_text(session, prompt, model_id="polyglot-ko-1.3b-chat"):
|
647 |
+
url = "http://localhost:8001/generate"
|
648 |
+
data = {
|
649 |
+
"prompt": prompt,
|
650 |
+
"model_id": model_id,
|
651 |
+
"max_length": 200,
|
652 |
+
"temperature": 0.7
|
653 |
+
}
|
654 |
+
|
655 |
+
async with session.post(url, data=data) as response:
|
656 |
+
return await response.json()
|
657 |
+
|
658 |
+
async def batch_generate_async(prompts):
|
659 |
+
async with aiohttp.ClientSession() as session:
|
660 |
+
tasks = [async_generate_text(session, prompt) for prompt in prompts]
|
661 |
+
results = await asyncio.gather(*tasks)
|
662 |
+
return results
|
663 |
+
|
664 |
+
# ์ฌ์ฉ ์์
|
665 |
+
prompts = ["์ง๋ฌธ1", "์ง๋ฌธ2", "์ง๋ฌธ3"]
|
666 |
+
results = asyncio.run(batch_generate_async(prompts))
|
667 |
+
```
|
668 |
+
|
669 |
+
### 4. ๋ก๊น
|
670 |
+
|
671 |
+
```python
|
672 |
+
import logging
|
673 |
+
|
674 |
+
# ๋ก๊น
์ค์
|
675 |
+
logging.basicConfig(
|
676 |
+
level=logging.INFO,
|
677 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
678 |
+
handlers=[
|
679 |
+
logging.FileHandler('lily_llm_client.log'),
|
680 |
+
logging.StreamHandler()
|
681 |
+
]
|
682 |
+
)
|
683 |
+
|
684 |
+
logger = logging.getLogger(__name__)
|
685 |
+
|
686 |
+
def generate_text_with_logging(prompt, model_id="polyglot-ko-1.3b-chat"):
|
687 |
+
logger.info(f"ํ
์คํธ ์์ฑ ์์: {prompt[:50]}...")
|
688 |
+
|
689 |
+
try:
|
690 |
+
result = generate_text(prompt, model_id)
|
691 |
+
logger.info(f"ํ
์คํธ ์์ฑ ์ฑ๊ณต: {len(result['generated_text'])} ๋ฌธ์")
|
692 |
+
return result
|
693 |
+
except Exception as e:
|
694 |
+
logger.error(f"ํ
์คํธ ์์ฑ ์คํจ: {e}")
|
695 |
+
raise
|
696 |
+
```
|
697 |
+
|
698 |
+
## ๐ ์ง์
|
699 |
+
|
700 |
+
### ๋์๋ง ๋ฆฌ์์ค
|
701 |
+
|
702 |
+
- **API ๋ฌธ์**: `http://localhost:8001/docs`
|
703 |
+
- **ReDoc ๋ฌธ์**: `http://localhost:8001/redoc`
|
704 |
+
- **GitHub Issues**: ํ๋ก์ ํธ ์ ์ฅ์์ Issues ์น์
|
705 |
+
- **๋ก๊ทธ ํ์ผ**: `./logs/` ๋๋ ํ ๋ฆฌ
|
706 |
+
|
707 |
+
### ๋๋ฒ๊น
ํ
|
708 |
+
|
709 |
+
1. **๋ก๊ทธ ํ์ธ**: ํญ์ ๋ก๊ทธ๋ฅผ ๋จผ์ ํ์ธํ์ธ์
|
710 |
+
2. **๋จ๊ณ๋ณ ํ
์คํธ**: ๋ณต์กํ ์์ฒญ์ ์์ ๋จ์๋ก ๋๋์ด ํ
์คํธํ์ธ์
|
711 |
+
3. **๋คํธ์ํฌ ํ์ธ**: ๋ฐฉํ๋ฒฝ์ด๋ ํ๋ก์ ์ค์ ์ ํ์ธํ์ธ์
|
712 |
+
4. **๋ฆฌ์์ค ๋ชจ๋ํฐ๋ง**: CPU, ๋ฉ๋ชจ๋ฆฌ, ๋์คํฌ ์ฌ์ฉ๋์ ์ฃผ๊ธฐ์ ์ผ๋ก ํ์ธํ์ธ์
|
713 |
+
|
714 |
+
### ์ฑ๋ฅ ํ
|
715 |
+
|
716 |
+
1. **์ ์ ํ ๋ชจ๋ธ ์ ํ**: ์์
์ ๋ง๋ ๋ชจ๋ธ์ ์ ํํ์ธ์
|
717 |
+
2. **๋ฐฐ์น ์ฒ๋ฆฌ**: ์ฌ๋ฌ ์์ฒญ์ ํ ๋ฒ์ ์ฒ๋ฆฌํ์ธ์
|
718 |
+
3. **์บ์ฑ ํ์ฉ**: ๋ฐ๋ณต๋๋ ์์ฒญ์ ์บ์๋ฅผ ์ฌ์ฉํ์ธ์
|
719 |
+
4. **๋น๋๊ธฐ ์ฒ๋ฆฌ**: ๋๋์ ์์ฒญ์ ๋น๋๊ธฐ๋ก ์ฒ๋ฆฌํ์ธ์
|
download_model.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Docker ๋น๋ ์ Hugging Face ๋ชจ๋ธ ๋ค์ด๋ก๋ ์คํฌ๋ฆฝํธ
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import logging
|
8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
logging.basicConfig(level=logging.INFO)
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
def download_model():
|
15 |
+
"""Hugging Face Hub์์ ๋ชจ๋ธ ๋ค์ด๋ก๋"""
|
16 |
+
|
17 |
+
model_name = "gbrabbit/lily-math-model"
|
18 |
+
cache_dir = "/app/cache/transformers"
|
19 |
+
|
20 |
+
logger.info(f"๐ฅ ๋ชจ๋ธ ๋ค์ด๋ก๋ ์์: {model_name}")
|
21 |
+
|
22 |
+
try:
|
23 |
+
# ์บ์ ๋๋ ํ ๋ฆฌ ์์ฑ
|
24 |
+
Path(cache_dir).mkdir(parents=True, exist_ok=True)
|
25 |
+
|
26 |
+
# ํ ํฌ๋์ด์ ๋ค์ด๋ก๋
|
27 |
+
logger.info("๐ค ํ ํฌ๋์ด์ ๋ค์ด๋ก๋ ์ค...")
|
28 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
29 |
+
model_name,
|
30 |
+
trust_remote_code=True,
|
31 |
+
cache_dir=cache_dir
|
32 |
+
)
|
33 |
+
logger.info("โ
ํ ํฌ๋์ด์ ๋ค์ด๋ก๋ ์๋ฃ")
|
34 |
+
|
35 |
+
# ๋ชจ๋ธ ๋ค์ด๋ก๋ (๊ฐ์ค์น๋ง)
|
36 |
+
logger.info("๐ง ๋ชจ๋ธ ๋ค์ด๋ก๋ ์ค...")
|
37 |
+
model = AutoModelForCausalLM.from_pretrained(
|
38 |
+
model_name,
|
39 |
+
trust_remote_code=True,
|
40 |
+
cache_dir=cache_dir,
|
41 |
+
torch_dtype="auto", # ๋ฉ๋ชจ๋ฆฌ ์ ์ฝ
|
42 |
+
low_cpu_mem_usage=True
|
43 |
+
)
|
44 |
+
logger.info("โ
๋ชจ๋ธ ๋ค์ด๋ก๋ ์๋ฃ")
|
45 |
+
|
46 |
+
# ๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ
|
47 |
+
del model
|
48 |
+
del tokenizer
|
49 |
+
|
50 |
+
logger.info("๐ ๋ชจ๋ธ ๋ค์ด๋ก๋ ๋ฐ ์บ์ ์๋ฃ")
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"โ ๋ชจ๋ธ ๋ค์ด๋ก๋ ์คํจ: {e}")
|
54 |
+
raise
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
download_model()
|
fix_huggingface_hub.bat
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@chcp 65001 >nul
|
2 |
+
@echo off
|
3 |
+
echo HuggingFace Hub ๋ฒ์ ๋ฌธ์ ํด๊ฒฐ ์์...
|
4 |
+
|
5 |
+
cd /d C:\Project\lily_generate_project\lily_generate_package
|
6 |
+
|
7 |
+
echo.
|
8 |
+
echo 1. latex_ocr_env ํ์ฑํ...
|
9 |
+
call latex_ocr_env\Scripts\activate
|
10 |
+
|
11 |
+
echo.
|
12 |
+
echo 2. ํ์ฌ huggingface_hub ๋ฒ์ ํ์ธ...
|
13 |
+
python -c "import huggingface_hub; print('ํ์ฌ ๋ฒ์ :', huggingface_hub.__version__)"
|
14 |
+
|
15 |
+
echo.
|
16 |
+
echo 3. huggingface_hub ๋ค์ด๊ทธ๋ ์ด๋ (ํธํ ๋ฒ์ ์ผ๋ก)...
|
17 |
+
python -m pip install huggingface_hub==0.16.4
|
18 |
+
|
19 |
+
echo.
|
20 |
+
echo 4. sentence-transformers ์ฌ์ค์น...
|
21 |
+
python -m pip uninstall sentence-transformers -y
|
22 |
+
python -m pip install sentence-transformers==2.2.2
|
23 |
+
|
24 |
+
echo.
|
25 |
+
echo 5. ์ค์น ํ์ธ...
|
26 |
+
python -c "import huggingface_hub; print('HuggingFace Hub:', huggingface_hub.__version__)"
|
27 |
+
python -c "import sentence_transformers; print('SentenceTransformers')"
|
28 |
+
|
29 |
+
echo.
|
30 |
+
echo HuggingFace Hub ๋ฒ์ ๋ฌธ์ ํด๊ฒฐ ์๋ฃ!
|
31 |
+
pause
|
huggingface_cloud_setup.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Hugging Face ํด๋ผ์ฐ๋ GPU ํ๊ฒฝ ์ค์
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import requests
|
8 |
+
import json
|
9 |
+
import logging
|
10 |
+
from huggingface_hub import login, HfApi
|
11 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
12 |
+
|
13 |
+
# ๋ก๊น
์ค์
|
14 |
+
logging.basicConfig(level=logging.INFO)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
class HuggingFaceCloudSetup:
|
18 |
+
"""Hugging Face ํด๋ผ์ฐ๋ GPU ์ค์ ํด๋์ค"""
|
19 |
+
|
20 |
+
def __init__(self):
|
21 |
+
"""์ด๊ธฐํ"""
|
22 |
+
self.api = None
|
23 |
+
self.model_name = "heegyu/polyglot-ko-5.8b-chat"
|
24 |
+
self.space_name = "lily-math-rag"
|
25 |
+
|
26 |
+
def setup_huggingface_login(self):
|
27 |
+
"""Hugging Face ๋ก๊ทธ์ธ ์ค์ """
|
28 |
+
logger.info("๐ Hugging Face ๋ก๊ทธ์ธ ์ค์ ")
|
29 |
+
|
30 |
+
try:
|
31 |
+
# ํ ํฐ ์
๋ ฅ ์์ฒญ
|
32 |
+
token = input("Hugging Face ํ ํฐ์ ์
๋ ฅํ์ธ์: ").strip()
|
33 |
+
|
34 |
+
if token:
|
35 |
+
login(token)
|
36 |
+
self.api = HfApi(token=token)
|
37 |
+
logger.info("โ
Hugging Face ๋ก๊ทธ์ธ ์ฑ๊ณต")
|
38 |
+
return True
|
39 |
+
else:
|
40 |
+
logger.error("โ ํ ํฐ์ด ํ์ํฉ๋๋ค")
|
41 |
+
return False
|
42 |
+
|
43 |
+
except Exception as e:
|
44 |
+
logger.error(f"โ Hugging Face ๋ก๊ทธ์ธ ์คํจ: {e}")
|
45 |
+
return False
|
46 |
+
|
47 |
+
def create_inference_endpoint(self):
|
48 |
+
"""์ถ๋ก ์๋ํฌ์ธํธ ์์ฑ"""
|
49 |
+
logger.info("๐ ์ถ๋ก ์๋ํฌ์ธํธ ์์ฑ ์ค...")
|
50 |
+
|
51 |
+
try:
|
52 |
+
# ์๋ํฌ์ธํธ ์ค์
|
53 |
+
endpoint_config = {
|
54 |
+
"account": "your-username", # Hugging Face ์ฌ์ฉ์๋ช
|
55 |
+
"name": "lily-math-rag-endpoint",
|
56 |
+
"repository": self.model_name,
|
57 |
+
"framework": "pytorch",
|
58 |
+
"accelerator": "gpu",
|
59 |
+
"instance_type": "gpu.t4.medium", # GPU ์ธ์คํด์ค ํ์
|
60 |
+
"region": "us-east-1",
|
61 |
+
"vendor": "aws"
|
62 |
+
}
|
63 |
+
|
64 |
+
logger.info("โ
์๋ํฌ์ธํธ ์ค์ ์๋ฃ")
|
65 |
+
logger.info(f" ๋ชจ๋ธ: {self.model_name}")
|
66 |
+
logger.info(f" GPU: {endpoint_config['instance_type']}")
|
67 |
+
logger.info(f" ์ง์ญ: {endpoint_config['region']}")
|
68 |
+
|
69 |
+
return endpoint_config
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
logger.error(f"โ ์๋ํฌ์ธํธ ์์ฑ ์คํจ: {e}")
|
73 |
+
return None
|
74 |
+
|
75 |
+
def create_huggingface_space(self):
|
76 |
+
"""Hugging Face Space ์์ฑ"""
|
77 |
+
logger.info("๐ Hugging Face Space ์์ฑ ์ค...")
|
78 |
+
|
79 |
+
try:
|
80 |
+
# Space ์ค์
|
81 |
+
space_config = {
|
82 |
+
"name": self.space_name,
|
83 |
+
"type": "gradio",
|
84 |
+
"sdk": "gradio",
|
85 |
+
"title": "Lily Math RAG System",
|
86 |
+
"description": "์ํ ๋ฌธ์ ํด๊ฒฐ์ ์ํ RAG ์์คํ
",
|
87 |
+
"license": "mit",
|
88 |
+
"python_version": "3.9"
|
89 |
+
}
|
90 |
+
|
91 |
+
logger.info("โ
Space ์ค์ ์๋ฃ")
|
92 |
+
logger.info(f" Space ์ด๋ฆ: {space_config['name']}")
|
93 |
+
logger.info(f" ํ์
: {space_config['type']}")
|
94 |
+
|
95 |
+
return space_config
|
96 |
+
|
97 |
+
except Exception as e:
|
98 |
+
logger.error(f"โ Space ์์ฑ ์คํจ: {e}")
|
99 |
+
return None
|
100 |
+
|
101 |
+
def upload_model_to_hub(self):
|
102 |
+
"""๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋"""
|
103 |
+
logger.info("๐ค ๋ชจ๋ธ ์
๋ก๋ ์ค...")
|
104 |
+
|
105 |
+
try:
|
106 |
+
# ๋ก์ปฌ ๋ชจ๋ธ ๊ฒฝ๋ก
|
107 |
+
local_model_path = "hearth_llm_model"
|
108 |
+
|
109 |
+
if os.path.exists(local_model_path):
|
110 |
+
logger.info(f"โ
๋ก์ปฌ ๋ชจ๋ธ ๋ฐ๊ฒฌ: {local_model_path}")
|
111 |
+
|
112 |
+
# ๋ชจ๋ธ ์
๋ก๋ (์ค์ ๋ก๋ Hugging Face CLI ์ฌ์ฉ)
|
113 |
+
logger.info("๐ก ๋ค์ ๋ช
๋ น์ด๋ก ๋ชจ๋ธ์ ์
๋ก๋ํ์ธ์:")
|
114 |
+
logger.info(f" huggingface-cli upload your-username/lily-math-model {local_model_path}")
|
115 |
+
|
116 |
+
return True
|
117 |
+
else:
|
118 |
+
logger.warning(f"โ ๏ธ ๋ก์ปฌ ๋ชจ๋ธ์ ์ฐพ์ ์ ์์ต๋๋ค: {local_model_path}")
|
119 |
+
return False
|
120 |
+
|
121 |
+
except Exception as e:
|
122 |
+
logger.error(f"โ ๋ชจ๋ธ ์
๋ก๋ ์คํจ: {e}")
|
123 |
+
return False
|
124 |
+
|
125 |
+
def test_cloud_inference(self, endpoint_url):
|
126 |
+
"""ํด๋ผ์ฐ๋ ์ถ๋ก ํ
์คํธ"""
|
127 |
+
logger.info("๐งช ํด๋ผ์ฐ๋ ์ถ๋ก ํ
์คํธ")
|
128 |
+
|
129 |
+
try:
|
130 |
+
# ํ
์คํธ ์์ฒญ
|
131 |
+
test_data = {
|
132 |
+
"inputs": "์๋
ํ์ธ์! ์ํ ๋ฌธ์ ๋ฅผ ๋์์ฃผ์ธ์.",
|
133 |
+
"parameters": {
|
134 |
+
"max_length": 100,
|
135 |
+
"temperature": 0.7
|
136 |
+
}
|
137 |
+
}
|
138 |
+
|
139 |
+
response = requests.post(
|
140 |
+
f"{endpoint_url}/predict",
|
141 |
+
json=test_data,
|
142 |
+
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
|
143 |
+
)
|
144 |
+
|
145 |
+
if response.status_code == 200:
|
146 |
+
result = response.json()
|
147 |
+
logger.info(f"โ
์ถ๋ก ํ
์คํธ ์ฑ๊ณต: {result}")
|
148 |
+
return True
|
149 |
+
else:
|
150 |
+
logger.error(f"โ ์ถ๋ก ํ
์คํธ ์คํจ: {response.status_code}")
|
151 |
+
return False
|
152 |
+
|
153 |
+
except Exception as e:
|
154 |
+
logger.error(f"โ ์ถ๋ก ํ
์คํธ ์คํจ: {e}")
|
155 |
+
return False
|
156 |
+
|
157 |
+
def main():
|
158 |
+
"""๋ฉ์ธ ์ค์ ํจ์"""
|
159 |
+
print("๐ Hugging Face ํด๋ผ์ฐ๋ GPU ํ๊ฒฝ ์ค์ ")
|
160 |
+
print("=" * 50)
|
161 |
+
|
162 |
+
# 1. Hugging Face ์ค์ ํด๋์ค ์ด๊ธฐํ
|
163 |
+
setup = HuggingFaceCloudSetup()
|
164 |
+
|
165 |
+
# 2. Hugging Face ๋ก๊ทธ์ธ
|
166 |
+
if not setup.setup_huggingface_login():
|
167 |
+
print("โ ๋ก๊ทธ์ธ ์คํจ")
|
168 |
+
return
|
169 |
+
|
170 |
+
# 3. ์ถ๋ก ์๋ํฌ์ธํธ ์์ฑ
|
171 |
+
endpoint_config = setup.create_inference_endpoint()
|
172 |
+
|
173 |
+
# 4. Hugging Face Space ์์ฑ
|
174 |
+
space_config = setup.create_huggingface_space()
|
175 |
+
|
176 |
+
# 5. ๋ชจ๋ธ ์
๋ก๋
|
177 |
+
setup.upload_model_to_hub()
|
178 |
+
|
179 |
+
print("\n๐ Hugging Face ํด๋ผ์ฐ๋ ์ค์ ์๋ฃ!")
|
180 |
+
print("โ
๋ค์ ๋จ๊ณ:")
|
181 |
+
print("1. Hugging Face Inference Endpoints์์ ์๋ํฌ์ธํธ ์์ฑ")
|
182 |
+
print("2. ๋ชจ๋ธ์ Hugging Face Hub์ ์
๋ก๋")
|
183 |
+
print("3. Railway Hearth Chat๊ณผ ์ฐ๋ ์ค์ ")
|
184 |
+
|
185 |
+
if __name__ == "__main__":
|
186 |
+
main()
|
huggingface_gpu_setup.py
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Hugging Face GPU ํ๊ฒฝ ์ค์
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import torch
|
8 |
+
import logging
|
9 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
10 |
+
from huggingface_hub import login
|
11 |
+
|
12 |
+
# ๋ก๊น
์ค์
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
class HuggingFaceGPUSetup:
|
17 |
+
"""Hugging Face GPU ์ค์ ํด๋์ค"""
|
18 |
+
|
19 |
+
def __init__(self):
|
20 |
+
"""์ด๊ธฐํ"""
|
21 |
+
self.model_name = "heegyu/polyglot-ko-5.8b-chat"
|
22 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
23 |
+
|
24 |
+
def setup_environment(self):
|
25 |
+
"""ํ๊ฒฝ ์ค์ """
|
26 |
+
logger.info("๐ Hugging Face GPU ํ๊ฒฝ ์ค์ ์์")
|
27 |
+
|
28 |
+
# CUDA ์ค์
|
29 |
+
if torch.cuda.is_available():
|
30 |
+
logger.info(f"โ
GPU ์ฌ์ฉ ๊ฐ๋ฅ: {torch.cuda.get_device_name(0)}")
|
31 |
+
logger.info(f" CUDA ๋ฒ์ : {torch.version.cuda}")
|
32 |
+
logger.info(f" GPU ๋ฉ๋ชจ๋ฆฌ: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")
|
33 |
+
else:
|
34 |
+
logger.warning("โ ๏ธ GPU๋ฅผ ์ฌ์ฉํ ์ ์์ต๋๋ค. CPU ๋ชจ๋๋ก ์คํ๋ฉ๋๋ค.")
|
35 |
+
|
36 |
+
# ํ๊ฒฝ ๋ณ์ ์ค์
|
37 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
38 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
39 |
+
|
40 |
+
return True
|
41 |
+
|
42 |
+
def setup_quantization(self):
|
43 |
+
"""์์ํ ์ค์ (๋ฉ๋ชจ๋ฆฌ ์ ์ฝ)"""
|
44 |
+
logger.info("๐ง ์์ํ ์ค์ ")
|
45 |
+
|
46 |
+
try:
|
47 |
+
# 4-bit ์์ํ ์ค์
|
48 |
+
quantization_config = BitsAndBytesConfig(
|
49 |
+
load_in_4bit=True,
|
50 |
+
bnb_4bit_compute_dtype=torch.float16,
|
51 |
+
bnb_4bit_quant_type="nf4",
|
52 |
+
bnb_4bit_use_double_quant=True,
|
53 |
+
)
|
54 |
+
|
55 |
+
logger.info("โ
4-bit ์์ํ ์ค์ ์๋ฃ")
|
56 |
+
return quantization_config
|
57 |
+
|
58 |
+
except Exception as e:
|
59 |
+
logger.warning(f"โ ๏ธ ์์ํ ์ค์ ์คํจ: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
def load_model_optimized(self, model_name=None):
|
63 |
+
"""์ต์ ํ๋ ๋ชจ๋ธ ๋ก๋"""
|
64 |
+
if model_name:
|
65 |
+
self.model_name = model_name
|
66 |
+
|
67 |
+
logger.info(f"๐ฅ ๋ชจ๋ธ ๋ก๋ ์ค: {self.model_name}")
|
68 |
+
|
69 |
+
try:
|
70 |
+
# ํ ํฌ๋์ด์ ๋ก๋
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
72 |
+
self.model_name,
|
73 |
+
trust_remote_code=True,
|
74 |
+
use_fast=False
|
75 |
+
)
|
76 |
+
|
77 |
+
# ์์ํ ์ค์ ์ ์ฉ
|
78 |
+
quantization_config = self.setup_quantization()
|
79 |
+
|
80 |
+
# ๋ชจ๋ธ ๋ก๋
|
81 |
+
if quantization_config:
|
82 |
+
model = AutoModelForCausalLM.from_pretrained(
|
83 |
+
self.model_name,
|
84 |
+
quantization_config=quantization_config,
|
85 |
+
device_map="auto",
|
86 |
+
trust_remote_code=True,
|
87 |
+
torch_dtype=torch.float16
|
88 |
+
)
|
89 |
+
else:
|
90 |
+
model = AutoModelForCausalLM.from_pretrained(
|
91 |
+
self.model_name,
|
92 |
+
device_map="auto",
|
93 |
+
trust_remote_code=True,
|
94 |
+
torch_dtype=torch.float16
|
95 |
+
)
|
96 |
+
|
97 |
+
logger.info("โ
๋ชจ๋ธ ๋ก๋ ์๋ฃ")
|
98 |
+
return model, tokenizer
|
99 |
+
|
100 |
+
except Exception as e:
|
101 |
+
logger.error(f"โ ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
102 |
+
return None, None
|
103 |
+
|
104 |
+
def optimize_memory(self):
|
105 |
+
"""๋ฉ๋ชจ๋ฆฌ ์ต์ ํ"""
|
106 |
+
logger.info("๐พ ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ")
|
107 |
+
|
108 |
+
if torch.cuda.is_available():
|
109 |
+
# GPU ๋ฉ๋ชจ๋ฆฌ ์บ์ ์ ๋ฆฌ
|
110 |
+
torch.cuda.empty_cache()
|
111 |
+
|
112 |
+
# ๋ฉ๋ชจ๋ฆฌ ํ ๋น ์ต์ ํ
|
113 |
+
torch.backends.cudnn.benchmark = True
|
114 |
+
torch.backends.cudnn.deterministic = False
|
115 |
+
|
116 |
+
# ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ํ์ธ
|
117 |
+
memory_allocated = torch.cuda.memory_allocated(0) / 1024**3
|
118 |
+
memory_reserved = torch.cuda.memory_reserved(0) / 1024**3
|
119 |
+
|
120 |
+
logger.info(f" ํ ๋น๋ ๋ฉ๋ชจ๋ฆฌ: {memory_allocated:.2f}GB")
|
121 |
+
logger.info(f" ์์ฝ๋ ๋ฉ๋ชจ๋ฆฌ: {memory_reserved:.2f}GB")
|
122 |
+
|
123 |
+
return True
|
124 |
+
|
125 |
+
def test_model_inference(self, model, tokenizer):
|
126 |
+
"""๋ชจ๋ธ ์ถ๋ก ํ
์คํธ"""
|
127 |
+
logger.info("๐งช ๋ชจ๋ธ ์ถ๋ก ํ
์คํธ")
|
128 |
+
|
129 |
+
try:
|
130 |
+
# ํ
์คํธ ์
๋ ฅ
|
131 |
+
test_input = "์๋
ํ์ธ์! ์ํ ๋ฌธ์ ๋ฅผ ๋์์ฃผ์ธ์."
|
132 |
+
|
133 |
+
# ํ ํฌ๋์ด์ง
|
134 |
+
inputs = tokenizer(test_input, return_tensors="pt")
|
135 |
+
if torch.cuda.is_available():
|
136 |
+
inputs = {k: v.cuda() for k, v in inputs.items()}
|
137 |
+
|
138 |
+
# ์ถ๋ก
|
139 |
+
with torch.no_grad():
|
140 |
+
outputs = model.generate(
|
141 |
+
**inputs,
|
142 |
+
max_length=100,
|
143 |
+
num_return_sequences=1,
|
144 |
+
temperature=0.7,
|
145 |
+
do_sample=True,
|
146 |
+
pad_token_id=tokenizer.eos_token_id
|
147 |
+
)
|
148 |
+
|
149 |
+
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
|
150 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
151 |
+
logger.info(f"โ
์ถ๋ก ํ
์คํธ ์ฑ๊ณต: {response[:50]}...")
|
152 |
+
|
153 |
+
return True
|
154 |
+
|
155 |
+
except Exception as e:
|
156 |
+
logger.error(f"โ ์ถ๋ก ํ
์คํธ ์คํจ: {e}")
|
157 |
+
return False
|
158 |
+
|
159 |
+
def setup_huggingface_login():
|
160 |
+
"""Hugging Face ๋ก๊ทธ์ธ ์ค์ """
|
161 |
+
logger.info("๐ Hugging Face ๋ก๊ทธ์ธ ์ค์ ")
|
162 |
+
|
163 |
+
try:
|
164 |
+
# ํ ํฐ ์
๋ ฅ ์์ฒญ
|
165 |
+
token = input("Hugging Face ํ ํฐ์ ์
๋ ฅํ์ธ์ (Enter๋ก ๊ฑด๋๋ฐ๊ธฐ): ").strip()
|
166 |
+
|
167 |
+
if token:
|
168 |
+
login(token)
|
169 |
+
logger.info("โ
Hugging Face ๋ก๊ทธ์ธ ์ฑ๊ณต")
|
170 |
+
return True
|
171 |
+
else:
|
172 |
+
logger.info("โ ๏ธ ํ ํฐ์ ์
๋ ฅํ์ง ์์์ต๋๋ค. ๊ณต๊ฐ ๋ชจ๋ธ๋ง ์ฌ์ฉ ๊ฐ๋ฅํฉ๋๋ค.")
|
173 |
+
return False
|
174 |
+
|
175 |
+
except Exception as e:
|
176 |
+
logger.error(f"โ Hugging Face ๋ก๊ทธ์ธ ์คํจ: {e}")
|
177 |
+
return False
|
178 |
+
|
179 |
+
def main():
|
180 |
+
"""๋ฉ์ธ ์ค์ ํจ์"""
|
181 |
+
print("๐ Hugging Face GPU ํ๊ฒฝ ์ค์ ")
|
182 |
+
print("=" * 50)
|
183 |
+
|
184 |
+
# 1. Hugging Face ๋ก๊ทธ์ธ
|
185 |
+
setup_huggingface_login()
|
186 |
+
|
187 |
+
# 2. GPU ์ค์ ํด๋์ค ์ด๊ธฐํ
|
188 |
+
setup = HuggingFaceGPUSetup()
|
189 |
+
|
190 |
+
# 3. ํ๊ฒฝ ์ค์
|
191 |
+
setup.setup_environment()
|
192 |
+
|
193 |
+
# 4. ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ
|
194 |
+
setup.optimize_memory()
|
195 |
+
|
196 |
+
# 5. ๋ชจ๋ธ ๋ก๋ (์ ํ์ฌํญ)
|
197 |
+
load_model = input("๋ชจ๋ธ์ ๋ก๋ํ์๊ฒ ์ต๋๊น? (y/N): ").strip().lower()
|
198 |
+
|
199 |
+
if load_model == 'y':
|
200 |
+
model, tokenizer = setup.load_model_optimized()
|
201 |
+
|
202 |
+
if model and tokenizer:
|
203 |
+
# 6. ์ถ๋ก ํ
์คํธ
|
204 |
+
setup.test_model_inference(model, tokenizer)
|
205 |
+
|
206 |
+
print("\n๐ Hugging Face GPU ํ๊ฒฝ ์ค์ ์๋ฃ!")
|
207 |
+
print("โ
์ด์ GPU ํ๊ฒฝ์์ ๋ชจ๋ธ์ ์ฌ์ฉํ ์ ์์ต๋๋ค.")
|
208 |
+
|
209 |
+
if __name__ == "__main__":
|
210 |
+
main()
|
lily-math-rag
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit e3f9de3c36d6444de4d64dbc3752d3082e7a4b0f
|
lily_llm.db
ADDED
Binary file (41 kB). View file
|
|
lily_llm_api/app.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Lily LLM API ์๋ฒ
|
4 |
+
ํ์ธํ๋๋ Mistral-7B ๋ชจ๋ธ์ RESTful API๋ก ์๋น
|
5 |
+
"""
|
6 |
+
|
7 |
+
from fastapi import FastAPI, HTTPException
|
8 |
+
from fastapi.middleware.cors import CORSMiddleware
|
9 |
+
from pydantic import BaseModel
|
10 |
+
import uvicorn
|
11 |
+
import logging
|
12 |
+
import time
|
13 |
+
import torch
|
14 |
+
from typing import Optional, List
|
15 |
+
|
16 |
+
# ๋ก๊น
์ค์
|
17 |
+
logging.basicConfig(level=logging.INFO)
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
+
|
20 |
+
# FastAPI ์ฑ ์์ฑ
|
21 |
+
app = FastAPI(
|
22 |
+
title="Lily LLM API",
|
23 |
+
description="Hearth Chat์ฉ ํ์ธํ๋๋ Mistral-7B ๋ชจ๋ธ API",
|
24 |
+
version="1.0.0"
|
25 |
+
)
|
26 |
+
|
27 |
+
# CORS ์ค์
|
28 |
+
app.add_middleware(
|
29 |
+
CORSMiddleware,
|
30 |
+
allow_origins=["*"], # ๊ฐ๋ฐ์ฉ, ํ๋ก๋์
์์๋ ํน์ ๋๋ฉ์ธ๋ง ํ์ฉ
|
31 |
+
allow_credentials=True,
|
32 |
+
allow_methods=["*"],
|
33 |
+
allow_headers=["*"],
|
34 |
+
)
|
35 |
+
|
36 |
+
# Pydantic ๋ชจ๋ธ๋ค
|
37 |
+
class GenerateRequest(BaseModel):
|
38 |
+
prompt: str
|
39 |
+
max_length: Optional[int] = 100
|
40 |
+
temperature: Optional[float] = 0.7
|
41 |
+
top_p: Optional[float] = 0.9
|
42 |
+
do_sample: Optional[bool] = True
|
43 |
+
|
44 |
+
class GenerateResponse(BaseModel):
|
45 |
+
generated_text: str
|
46 |
+
processing_time: float
|
47 |
+
model_name: str = "Lily LLM (Mistral-7B)"
|
48 |
+
|
49 |
+
class HealthResponse(BaseModel):
|
50 |
+
status: str
|
51 |
+
model_loaded: bool
|
52 |
+
model_name: str
|
53 |
+
|
54 |
+
# ์ ์ญ ๋ณ์
|
55 |
+
model = None
|
56 |
+
tokenizer = None
|
57 |
+
model_loaded = False
|
58 |
+
|
59 |
+
@app.on_event("startup")
|
60 |
+
async def startup_event():
|
61 |
+
"""์๋ฒ ์์ ์ ๋ชจ๋ธ ๋ก๋"""
|
62 |
+
global model, tokenizer, model_loaded
|
63 |
+
|
64 |
+
logger.info("๐ Lily LLM API ์๋ฒ ์์ ์ค...")
|
65 |
+
logger.info("๐ API ๋ฌธ์: http://localhost:8001/docs")
|
66 |
+
logger.info("๐ ํฌ์ค ์ฒดํฌ: http://localhost:8001/health")
|
67 |
+
|
68 |
+
try:
|
69 |
+
# ๋ชจ๋ธ ๋ก๋ฉ (๋น๋๊ธฐ๋ก ์ฒ๋ฆฌํ์ฌ ์๋ฒ ์์ ์๋ ํฅ์)
|
70 |
+
await load_model_async()
|
71 |
+
model_loaded = True
|
72 |
+
logger.info("โ
๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ!")
|
73 |
+
except Exception as e:
|
74 |
+
logger.error(f"โ ๋ชจ๋ธ ๋ก๋ฉ ์คํจ: {e}")
|
75 |
+
model_loaded = False
|
76 |
+
|
77 |
+
async def load_model_async():
|
78 |
+
"""๋น๋๊ธฐ ๋ชจ๋ธ ๋ก๋ฉ"""
|
79 |
+
global model, tokenizer
|
80 |
+
|
81 |
+
# ๋ชจ๋ธ ๋ก๋ฉ์ ๋ณ๋ ์ค๋ ๋์์ ์คํ
|
82 |
+
import asyncio
|
83 |
+
import concurrent.futures
|
84 |
+
|
85 |
+
def load_model_sync():
|
86 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
87 |
+
from peft import PeftModel
|
88 |
+
import torch
|
89 |
+
|
90 |
+
logger.info("๋ชจ๋ธ ๋ก๋ฉ ์ค...")
|
91 |
+
|
92 |
+
# ๋ก์ปฌ ๋ชจ๋ธ ๊ฒฝ๋ก ์ฌ์ฉ
|
93 |
+
local_model_path = "./lily_llm_core/models/polyglot-ko-1.3b"
|
94 |
+
|
95 |
+
try:
|
96 |
+
# ๋ก์ปฌ ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ก๋
|
97 |
+
tokenizer = AutoTokenizer.from_pretrained(local_model_path, use_fast=True)
|
98 |
+
|
99 |
+
if tokenizer.pad_token is None:
|
100 |
+
tokenizer.pad_token = tokenizer.eos_token
|
101 |
+
|
102 |
+
# ๋ชจ๋ธ ๋ก๋ (CPU์์)
|
103 |
+
model = AutoModelForCausalLM.from_pretrained(
|
104 |
+
local_model_path,
|
105 |
+
torch_dtype=torch.float32,
|
106 |
+
device_map="cpu",
|
107 |
+
low_cpu_mem_usage=True
|
108 |
+
)
|
109 |
+
|
110 |
+
logger.info("โ
polyglot-ko-1.3b ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต!")
|
111 |
+
return model, tokenizer
|
112 |
+
|
113 |
+
except Exception as e:
|
114 |
+
logger.error(f"๋ก์ปฌ ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
115 |
+
logger.info("ํ
์คํธ์ฉ ๊ฐ๋จํ ๋ชจ๋ธ ๋ก๋ ์ค...")
|
116 |
+
|
117 |
+
# DialoGPT-medium์ผ๋ก ๋์ฒด (๋ ์์ ๋ชจ๋ธ)
|
118 |
+
test_model_name = "microsoft/DialoGPT-medium"
|
119 |
+
tokenizer = AutoTokenizer.from_pretrained(test_model_name)
|
120 |
+
model = AutoModelForCausalLM.from_pretrained(test_model_name)
|
121 |
+
|
122 |
+
return model, tokenizer
|
123 |
+
|
124 |
+
# ๋ณ๋ ์ค๋ ๋์์ ๋ชจ๋ธ ๋ก๋ฉ
|
125 |
+
loop = asyncio.get_event_loop()
|
126 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
127 |
+
model, tokenizer = await loop.run_in_executor(executor, load_model_sync)
|
128 |
+
|
129 |
+
@app.get("/", response_model=dict)
|
130 |
+
async def root():
|
131 |
+
"""๋ฃจํธ ์๋ํฌ์ธํธ"""
|
132 |
+
return {
|
133 |
+
"message": "Lily LLM API ์๋ฒ",
|
134 |
+
"version": "1.0.0",
|
135 |
+
"model": "Mistral-7B-Instruct-v0.2 (Fine-tuned)",
|
136 |
+
"docs": "/docs"
|
137 |
+
}
|
138 |
+
|
139 |
+
@app.get("/health", response_model=HealthResponse)
|
140 |
+
async def health_check():
|
141 |
+
"""ํฌ์ค ์ฒดํฌ ์๋ํฌ์ธํธ"""
|
142 |
+
return HealthResponse(
|
143 |
+
status="healthy",
|
144 |
+
model_loaded=model_loaded,
|
145 |
+
model_name="Lily LLM (Mistral-7B)"
|
146 |
+
)
|
147 |
+
|
148 |
+
@app.post("/generate", response_model=GenerateResponse)
|
149 |
+
async def generate_text(request: GenerateRequest):
|
150 |
+
"""ํ
์คํธ ์์ฑ ์๋ํฌ์ธํธ"""
|
151 |
+
global model, tokenizer
|
152 |
+
|
153 |
+
if not model_loaded or model is None or tokenizer is None:
|
154 |
+
raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค")
|
155 |
+
|
156 |
+
start_time = time.time()
|
157 |
+
|
158 |
+
try:
|
159 |
+
logger.info(f"ํ
์คํธ ์์ฑ ์์: '{request.prompt}'")
|
160 |
+
|
161 |
+
# polyglot ๋ชจ๋ธ์ ๋ง๋ ํ๋กฌํํธ ํ์์ผ๋ก ์์
|
162 |
+
formatted_prompt = f"์ง๋ฌธ: {request.prompt}\n๋ต๋ณ:"
|
163 |
+
logger.info(f"ํฌ๋งท๋ ํ๋กฌํํธ: '{formatted_prompt}'")
|
164 |
+
|
165 |
+
# ์
๋ ฅ ํ ํฌ๋์ด์ง - padding ์ ๊ฑฐํ๊ณ ํจ๋ฉ ํ ํฐ ์ค์
|
166 |
+
if tokenizer.pad_token is None:
|
167 |
+
tokenizer.pad_token = tokenizer.eos_token
|
168 |
+
|
169 |
+
inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True)
|
170 |
+
logger.info(f"์
๋ ฅ ํ ํฐ ์: {inputs['input_ids'].shape[1]}")
|
171 |
+
|
172 |
+
# ํ
์คํธ ์์ฑ - ๋ ๊ฐ๋ ฅํ ์ค์ ์ผ๋ก ์์
|
173 |
+
with torch.no_grad():
|
174 |
+
outputs = model.generate(
|
175 |
+
inputs["input_ids"],
|
176 |
+
attention_mask=inputs["attention_mask"],
|
177 |
+
max_new_tokens=request.max_length,
|
178 |
+
do_sample=True,
|
179 |
+
temperature=0.9, # ๋ ๋์ temperature
|
180 |
+
top_k=50, # top_k ์ถ๊ฐ
|
181 |
+
top_p=0.95, # top_p ์ถ๊ฐ
|
182 |
+
repetition_penalty=1.2, # ๋ฐ๋ณต ๋ฐฉ์ง
|
183 |
+
no_repeat_ngram_size=2, # n-gram ๋ฐ๋ณต ๋ฐฉ์ง
|
184 |
+
pad_token_id=tokenizer.eos_token_id,
|
185 |
+
eos_token_id=tokenizer.eos_token_id
|
186 |
+
)
|
187 |
+
|
188 |
+
logger.info(f"์์ฑ๋ ํ ํฐ ์: {outputs.shape[1]}")
|
189 |
+
|
190 |
+
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
|
191 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
192 |
+
logger.info(f"๋์ฝ๋ฉ๋ ์ ์ฒด ํ
์คํธ: '{generated_text}'")
|
193 |
+
|
194 |
+
# polyglot ์๋ต ๋ถ๋ถ๋ง ์ถ์ถ
|
195 |
+
if "๋ต๋ณ:" in generated_text:
|
196 |
+
response = generated_text.split("๋ต๋ณ:")[-1].strip()
|
197 |
+
logger.info(f"๋ต๋ณ ์ถ์ถ: '{response}'")
|
198 |
+
else:
|
199 |
+
# ๊ธฐ์กด ๋ฐฉ์์ผ๋ก ํ๋กฌํํธ ์ ๊ฑฐ
|
200 |
+
if formatted_prompt in generated_text:
|
201 |
+
response = generated_text.replace(formatted_prompt, "").strip()
|
202 |
+
else:
|
203 |
+
response = generated_text.strip()
|
204 |
+
logger.info(f"ํ๋กฌํํธ ์ ๊ฑฐ ํ: '{response}'")
|
205 |
+
|
206 |
+
# ๋น ์๋ต ์ฒ๋ฆฌ
|
207 |
+
if not response.strip():
|
208 |
+
logger.warning("์์ฑ๋ ํ
์คํธ๊ฐ ๋น์ด์์, ๊ธฐ๋ณธ ์๋ต ์ฌ์ฉ")
|
209 |
+
response = "์๋
ํ์ธ์! ๋ฌด์์ ๋์๋๋ฆด๊น์?"
|
210 |
+
|
211 |
+
processing_time = time.time() - start_time
|
212 |
+
|
213 |
+
logger.info(f"์์ฑ ์๋ฃ: {processing_time:.2f}์ด, ํ
์คํธ ๊ธธ์ด: {len(response)}")
|
214 |
+
|
215 |
+
return GenerateResponse(
|
216 |
+
generated_text=response,
|
217 |
+
processing_time=processing_time
|
218 |
+
)
|
219 |
+
|
220 |
+
except Exception as e:
|
221 |
+
logger.error(f"ํ
์คํธ ์์ฑ ์ค๋ฅ: {e}")
|
222 |
+
raise HTTPException(status_code=500, detail=f"ํ
์คํธ ์์ฑ ์คํจ: {str(e)}")
|
223 |
+
|
224 |
+
@app.get("/models")
|
225 |
+
async def list_models():
|
226 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
227 |
+
return {
|
228 |
+
"models": [
|
229 |
+
{
|
230 |
+
"id": "lily-llm",
|
231 |
+
"name": "Lily LLM",
|
232 |
+
"description": "Hearth Chat์ฉ ํ์ธํ๋๋ Mistral-7B ๋ชจ๋ธ",
|
233 |
+
"base_model": "mistralai/Mistral-7B-Instruct-v0.2",
|
234 |
+
"fine_tuned": True
|
235 |
+
}
|
236 |
+
]
|
237 |
+
}
|
238 |
+
|
239 |
+
if __name__ == "__main__":
|
240 |
+
uvicorn.run(
|
241 |
+
app,
|
242 |
+
host="0.0.0.0",
|
243 |
+
port=8001,
|
244 |
+
reload=False,
|
245 |
+
log_level="info"
|
246 |
+
)
|
lily_llm_api/app_v2.py
ADDED
@@ -0,0 +1,2049 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Lily LLM API ์๋ฒ v2 (์ธํฐ๋ํฐ๋ธ ์ ํ ๋ณต์ ๋ฐ ์ฑ๋ฅ ์ต์ ํ ์ต์ข
๋ณธ)
|
4 |
+
"""
|
5 |
+
from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Depends, WebSocket, WebSocketDisconnect
|
6 |
+
from fastapi.security import HTTPAuthorizationCredentials
|
7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
8 |
+
from pydantic import BaseModel
|
9 |
+
import uvicorn
|
10 |
+
import logging
|
11 |
+
import time
|
12 |
+
import torch
|
13 |
+
from datetime import datetime
|
14 |
+
from typing import Optional, List, Union
|
15 |
+
import asyncio
|
16 |
+
import concurrent.futures
|
17 |
+
import sys
|
18 |
+
from PIL import Image
|
19 |
+
import io
|
20 |
+
import os
|
21 |
+
import json
|
22 |
+
from pathlib import Path
|
23 |
+
|
24 |
+
from .models import get_model_profile, list_available_models
|
25 |
+
from lily_llm_core.rag_processor import rag_processor
|
26 |
+
from lily_llm_core.document_processor import document_processor
|
27 |
+
from lily_llm_core.hybrid_prompt_generator import hybrid_prompt_generator
|
28 |
+
from lily_llm_core.database import db_manager
|
29 |
+
from lily_llm_core.auth_manager import auth_manager
|
30 |
+
from lily_llm_core.websocket_manager import connection_manager
|
31 |
+
from lily_llm_core.celery_app import (
|
32 |
+
process_document_async, generate_ai_response_async,
|
33 |
+
rag_query_async, batch_process_documents_async,
|
34 |
+
get_task_status, cancel_task
|
35 |
+
)
|
36 |
+
from lily_llm_core.performance_monitor import performance_monitor
|
37 |
+
|
38 |
+
# ์ด๋ฏธ์ง OCR ์ ์ฉ ๋ชจ๋ ์ถ๊ฐ
|
39 |
+
from lily_llm_core.image_rag_processor import image_rag_processor
|
40 |
+
from lily_llm_core.latex_rag_processor import latex_rag_processor
|
41 |
+
from lily_llm_core.vector_store_manager import vector_store_manager
|
42 |
+
|
43 |
+
# LaTeX-OCR + FAISS ํตํฉ ์์คํ
์ถ๊ฐ
|
44 |
+
from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
|
45 |
+
from latex_ocr_faiss_simple import LatexOCRFAISSSimple
|
46 |
+
|
47 |
+
# ๋ฉํฐ๋ชจ๋ฌ RAG ํ๋ก์ธ์ ์ถ๊ฐ
|
48 |
+
from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
|
49 |
+
|
50 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
51 |
+
logger = logging.getLogger(__name__)
|
52 |
+
|
53 |
+
# FastAPI ์ฑ ์์ฑ
|
54 |
+
app = FastAPI(
|
55 |
+
title="Lily LLM API v2",
|
56 |
+
description="๋ค์ค ๋ชจ๋ธ ์ง์ LLM API ์๋ฒ",
|
57 |
+
version="2.0.0"
|
58 |
+
)
|
59 |
+
|
60 |
+
# CORS ์ค์
|
61 |
+
app.add_middleware(
|
62 |
+
CORSMiddleware,
|
63 |
+
allow_origins=[
|
64 |
+
"http://localhost:8001",
|
65 |
+
"http://127.0.0.1:8001",
|
66 |
+
"http://localhost:3000",
|
67 |
+
"http://127.0.0.1:3000",
|
68 |
+
"*" # ๊ฐ๋ฐ ์ค์๋ ๋ชจ๋ origin ํ์ฉ
|
69 |
+
],
|
70 |
+
allow_credentials=True,
|
71 |
+
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
72 |
+
allow_headers=["*"],
|
73 |
+
)
|
74 |
+
|
75 |
+
# Pydantic ๋ชจ๋ธ๋ค
|
76 |
+
class GenerateRequest(BaseModel):
|
77 |
+
prompt: str
|
78 |
+
model_id: Optional[str] = None # ๊ธฐ๋ณธ๊ฐ ์ ๊ฑฐ - ํ์ฌ ๋ก๋๋ ๋ชจ๋ธ ์ฌ์ฉ
|
79 |
+
max_length: Optional[int] = None
|
80 |
+
temperature: Optional[float] = None
|
81 |
+
top_p: Optional[float] = None
|
82 |
+
do_sample: Optional[bool] = None
|
83 |
+
|
84 |
+
class GenerateResponse(BaseModel):
|
85 |
+
generated_text: str
|
86 |
+
processing_time: float
|
87 |
+
model_name: str
|
88 |
+
image_processed: bool
|
89 |
+
|
90 |
+
class MultimodalGenerateResponse(BaseModel):
|
91 |
+
generated_text: str
|
92 |
+
processing_time: float
|
93 |
+
model_name: str
|
94 |
+
model_id: Optional[str] = None
|
95 |
+
image_processed: bool = False
|
96 |
+
|
97 |
+
class HealthResponse(BaseModel):
|
98 |
+
status: str
|
99 |
+
model_loaded: bool
|
100 |
+
current_model: str
|
101 |
+
available_models: List[dict]
|
102 |
+
|
103 |
+
class DocumentUploadResponse(BaseModel):
|
104 |
+
success: bool
|
105 |
+
document_id: str
|
106 |
+
message: str
|
107 |
+
chunks: Optional[int] = None
|
108 |
+
latex_count: Optional[int] = None # LaTeX ์์ ๊ฐ์ ํ๋ ์ถ๊ฐ
|
109 |
+
error: Optional[str] = None
|
110 |
+
auto_response: Optional[str] = None # ์๋ ์๋ต ํ๋ ์ถ๊ฐ
|
111 |
+
|
112 |
+
class RAGResponse(BaseModel):
|
113 |
+
success: bool
|
114 |
+
response: str
|
115 |
+
context: str
|
116 |
+
sources: List[dict]
|
117 |
+
search_results: int
|
118 |
+
processing_time: float
|
119 |
+
|
120 |
+
# ์ฌ์ฉ์ ๊ด๋ จ ์๋ต ๋ชจ๋ธ
|
121 |
+
class UserResponse(BaseModel):
|
122 |
+
success: bool
|
123 |
+
user_id: str
|
124 |
+
username: Optional[str] = None
|
125 |
+
email: Optional[str] = None
|
126 |
+
created_at: Optional[str] = None
|
127 |
+
error: Optional[str] = None
|
128 |
+
|
129 |
+
class SessionResponse(BaseModel):
|
130 |
+
success: bool
|
131 |
+
session_id: str
|
132 |
+
session_name: Optional[str] = None
|
133 |
+
created_at: Optional[str] = None
|
134 |
+
error: Optional[str] = None
|
135 |
+
|
136 |
+
class ChatMessageResponse(BaseModel):
|
137 |
+
success: bool
|
138 |
+
message_id: int
|
139 |
+
content: str
|
140 |
+
message_type: str
|
141 |
+
timestamp: str
|
142 |
+
error: Optional[str] = None
|
143 |
+
|
144 |
+
# ์ธ์ฆ ๊ด๋ จ ์๋ต ๋ชจ๋ธ
|
145 |
+
class LoginResponse(BaseModel):
|
146 |
+
success: bool
|
147 |
+
access_token: Optional[str] = None
|
148 |
+
refresh_token: Optional[str] = None
|
149 |
+
token_type: Optional[str] = None
|
150 |
+
user_id: Optional[str] = None
|
151 |
+
username: Optional[str] = None
|
152 |
+
error: Optional[str] = None
|
153 |
+
|
154 |
+
class TokenResponse(BaseModel):
|
155 |
+
success: bool
|
156 |
+
access_token: Optional[str] = None
|
157 |
+
token_type: Optional[str] = None
|
158 |
+
error: Optional[str] = None
|
159 |
+
|
160 |
+
# ์ ์ญ ๋ณ์
|
161 |
+
model = None
|
162 |
+
tokenizer = None
|
163 |
+
current_profile = None
|
164 |
+
model_loaded = False
|
165 |
+
image_processor = None
|
166 |
+
executor = concurrent.futures.ThreadPoolExecutor()
|
167 |
+
|
168 |
+
def select_model_interactive():
|
169 |
+
"""์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ"""
|
170 |
+
available_models = list_available_models()
|
171 |
+
|
172 |
+
print("\n" + "="*60 + "\n๐ค Lily LLM API v2 - ๋ชจ๋ธ ์ ํ\n" + "="*60)
|
173 |
+
for i, model_info in enumerate(available_models, 1):
|
174 |
+
print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
|
175 |
+
while True:
|
176 |
+
try:
|
177 |
+
# choice = input(f"\n๐ ์ฌ์ฉํ ๋ชจ๋ธ ๋ฒํธ๋ฅผ ์ ํํ์ธ์ (1-{len(available_models)}): ")
|
178 |
+
# selected_model = available_models[int(choice) - 1]
|
179 |
+
selected_model = available_models[1]
|
180 |
+
print(f"\nโ
'{selected_model['name']}' ๋ชจ๋ธ์ ์ ํํ์ต๋๋ค.")
|
181 |
+
return selected_model['model_id']
|
182 |
+
except (ValueError, IndexError):
|
183 |
+
print(f"โ 1์์ {len(available_models)} ์ฌ์ด์ ์ซ์๋ฅผ ์
๋ ฅํด์ฃผ์ธ์.")
|
184 |
+
except KeyboardInterrupt: sys.exit("\n\n๐ ํ๋ก๊ทธ๋จ์ ์ข
๋ฃํฉ๋๋ค.")
|
185 |
+
|
186 |
+
@app.on_event("startup")
|
187 |
+
async def startup_event():
|
188 |
+
"""[๋ณต์] ์๋ฒ ์์ ์ ์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ ๋ฐ ๋ก๋"""
|
189 |
+
global model_loaded
|
190 |
+
selected_model_id = select_model_interactive()
|
191 |
+
try:
|
192 |
+
await load_model_async(selected_model_id)
|
193 |
+
model_loaded = True
|
194 |
+
logger.info(f"โ
์๋ฒ๊ฐ '{current_profile.display_name}' ๋ชจ๋ธ๋ก ์ค๋น๋์์ต๋๋ค.")
|
195 |
+
except Exception as e:
|
196 |
+
logger.error(f"โ ๋ชจ๋ธ ๋ก๋์ ์คํจํ์ต๋๋ค: {e}", exc_info=True)
|
197 |
+
model_loaded = False
|
198 |
+
|
199 |
+
@app.on_event("shutdown")
|
200 |
+
def shutdown_event():
|
201 |
+
executor.shutdown(wait=True)
|
202 |
+
|
203 |
+
async def load_model_async(model_id: str):
|
204 |
+
loop = asyncio.get_event_loop()
|
205 |
+
await loop.run_in_executor(executor, load_model_sync, model_id)
|
206 |
+
|
207 |
+
def load_model_sync(model_id: str):
|
208 |
+
"""๋ชจ๋ธ ๋ฐ ๊ด๋ จ ํ๋ก์ธ์๋ฅผ ๋๊ธฐ์ ์ผ๋ก ๋ก๋ฉ"""
|
209 |
+
global model, tokenizer, image_processor, current_profile
|
210 |
+
|
211 |
+
try:
|
212 |
+
# ๊ธฐ์กด ๋ชจ๋ธ ์ธ๋ก๋ (๋ฉ๋ชจ๋ฆฌ ์ ๋ฆฌ)
|
213 |
+
if model is not None:
|
214 |
+
logger.info("๐๏ธ ๊ธฐ์กด ๋ชจ๋ธ ์ธ๋ก๋ ์ค...")
|
215 |
+
del model
|
216 |
+
del tokenizer
|
217 |
+
model = None
|
218 |
+
tokenizer = None
|
219 |
+
import gc
|
220 |
+
gc.collect()
|
221 |
+
logger.info("โ
๊ธฐ์กด ๋ชจ๋ธ ์ธ๋ก๋ ์๋ฃ")
|
222 |
+
|
223 |
+
logger.info(f"๐ฅ '{model_id}' ๋ชจ๋ธ ๋ก๋ฉ ์์...")
|
224 |
+
current_profile = get_model_profile(model_id)
|
225 |
+
model, tokenizer = current_profile.load_model()
|
226 |
+
|
227 |
+
# ๋ชจ๋ธ ์ ๋ณด ๋๋ฒ๊ทธ
|
228 |
+
if hasattr(model, 'num_parameters'):
|
229 |
+
logger.info(f"๐ ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ ์: {model.num_parameters():,}")
|
230 |
+
|
231 |
+
# ๋ฉํฐ๋ชจ๋ฌ ํ ํฌ๋์ด์ ์ค์
|
232 |
+
if getattr(current_profile, 'multimodal', False):
|
233 |
+
logger.info("๐ง ํ ํฌ๋์ด์ ๋ฉํฐ๋ชจ๋ฌ ๊ธฐ๋ฅ ํ์ฑํ...")
|
234 |
+
tokenizer.mllm_setup(num_visual_tokens=1)
|
235 |
+
from transformers import AutoImageProcessor
|
236 |
+
image_processor = AutoImageProcessor.from_pretrained(current_profile.local_path, trust_remote_code=True, local_files_only=True)
|
237 |
+
logger.info("โ
์ด๋ฏธ์ง ํ๋ก์ธ์ ๋ก๋ ์ฑ๊ณต!")
|
238 |
+
else:
|
239 |
+
image_processor = None
|
240 |
+
|
241 |
+
logger.info(f"โ
'{current_profile.display_name}' ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ!")
|
242 |
+
|
243 |
+
except Exception as e:
|
244 |
+
logger.error(f"โ load_model_sync ์คํจ: {e}")
|
245 |
+
import traceback
|
246 |
+
logger.error(f"๐ ์ ์ฒด ์๋ฌ: {traceback.format_exc()}")
|
247 |
+
raise
|
248 |
+
|
249 |
+
def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
|
250 |
+
temperature: Optional[float] = None, top_p: Optional[float] = None,
|
251 |
+
do_sample: Optional[bool] = None) -> dict:
|
252 |
+
"""[์ต์ ํ] ๋ชจ๋ธ ์์ฑ์ ์ฒ๋ฆฌํ๋ ํตํฉ ๋๊ธฐ ํจ์"""
|
253 |
+
image_processed = False
|
254 |
+
all_pixel_values = []
|
255 |
+
combined_image_metas = None
|
256 |
+
|
257 |
+
# --- 1. ์ด๋ฏธ์ง ์ฒ๋ฆฌ (์ด๋ฏธ์ง๊ฐ ์๋ ๊ฒฝ์ฐ์๋ง ์คํ) ---
|
258 |
+
if image_data_list and getattr(current_profile, 'multimodal', False):
|
259 |
+
logger.info(f"๐ผ๏ธ ๋ฉํฐ๋ชจ๋ฌ ์์ฒญ ์ฒ๋ฆฌ ์์... (์ด๋ฏธ์ง {len(image_data_list)}๊ฐ)")
|
260 |
+
image_processed = True
|
261 |
+
all_image_metas = []
|
262 |
+
|
263 |
+
for i, image_data in enumerate(image_data_list):
|
264 |
+
pil_image = Image.open(io.BytesIO(image_data)).convert("RGB")
|
265 |
+
processed_data = image_processor(pil_image)
|
266 |
+
all_pixel_values.append(processed_data["pixel_values"])
|
267 |
+
all_image_metas.append(processed_data["image_meta"])
|
268 |
+
|
269 |
+
# ๋ชจ๋ ์ด๋ฏธ์ง ๋ฉํ๋ฐ์ดํฐ๋ฅผ ํ๋๋ก ํฉ์น๊ธฐ
|
270 |
+
if all_image_metas:
|
271 |
+
combined_image_metas = {}
|
272 |
+
for key in all_image_metas[0].keys():
|
273 |
+
combined_image_metas[key] = [meta[key] for meta in all_image_metas]
|
274 |
+
|
275 |
+
# --- 2. ํ๋กฌํํธ ๊ตฌ์ฑ (์ด๋ฏธ์ง ์ ๋ฌด์ ๊ด๊ณ์์ด ๊ณตํต ์คํ) ---
|
276 |
+
image_tokens = "<image>" * len(all_pixel_values)
|
277 |
+
# ํ
์คํธ์ ๋ฉํฐ๋ชจ๋ฌ ๋ชจ๋ ๋์ผํ ํ๋กฌํํธ ๏ฟฝ๏ฟฝํ๋ฆฟ ์ฌ์ฉ
|
278 |
+
formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
279 |
+
|
280 |
+
# --- 3. ํ ํฌ๋์ด์ง (๊ณตํต ์คํ) ---
|
281 |
+
# ํ
์คํธ์ ๋ฉํฐ๋ชจ๋ฌ ๋ชจ๋ ๋์ผํ ์ปค์คํ
ํ ํฌ๋์ด์ ํจ์ ์ฌ์ฉ
|
282 |
+
inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
|
283 |
+
|
284 |
+
input_ids = inputs['input_ids'].unsqueeze(0).to(model.device)
|
285 |
+
attention_mask = inputs['attention_mask'].unsqueeze(0).to(model.device)
|
286 |
+
|
287 |
+
# --- 4. ๋ชจ๋ธ ์์ฑ (๊ณตํต ์คํ) ---
|
288 |
+
gen_config = current_profile.get_generation_config()
|
289 |
+
|
290 |
+
# max_length ๋ฑ ์ฌ์ฉ์ ์ง์ ํ๋ผ๋ฏธํฐ๊ฐ ์์ผ๋ฉด gen_config์ ๋ฐ์
|
291 |
+
if max_length is not None: gen_config['max_new_tokens'] = max_length
|
292 |
+
if temperature is not None: gen_config['temperature'] = temperature
|
293 |
+
if top_p is not None: gen_config['top_p'] = top_p
|
294 |
+
if do_sample is not None: gen_config['do_sample'] = do_sample
|
295 |
+
|
296 |
+
with torch.no_grad():
|
297 |
+
if image_processed:
|
298 |
+
# ๋ฉํฐ๋ชจ๋ฌ ์์ฑ
|
299 |
+
outputs = model.generate(
|
300 |
+
pixel_values=all_pixel_values,
|
301 |
+
image_metas=combined_image_metas,
|
302 |
+
input_ids=input_ids,
|
303 |
+
attention_mask=attention_mask,
|
304 |
+
**gen_config
|
305 |
+
)
|
306 |
+
else:
|
307 |
+
# ํ
์คํธ ์ ์ฉ ์์ฑ (์ต์ ํ๋ ๊ฒฝ๋ก ์ฌ์ฉ)
|
308 |
+
outputs = model.generate(
|
309 |
+
input_ids=input_ids,
|
310 |
+
attention_mask=attention_mask,
|
311 |
+
**gen_config
|
312 |
+
)
|
313 |
+
|
314 |
+
# --- 5. ์๋ต ์ถ์ถ (๊ณตํต ๋ก์ง) ---
|
315 |
+
try:
|
316 |
+
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
317 |
+
response = current_profile.extract_response(full_text, formatted_prompt)
|
318 |
+
return {"text": response, "image_processed": image_processed}
|
319 |
+
except Exception as e:
|
320 |
+
logger.error(f"โ ์๋ต ์ถ์ถ ์ค ์ค๋ฅ: {e}")
|
321 |
+
raise HTTPException(status_code=500, detail=f"์๋ต ์ถ์ถ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}")
|
322 |
+
|
323 |
+
@app.post("/generate", response_model=GenerateResponse)
|
324 |
+
async def generate(prompt: str = Form(...),
|
325 |
+
image1: UploadFile = File(None),
|
326 |
+
image2: UploadFile = File(None),
|
327 |
+
image3: UploadFile = File(None),
|
328 |
+
image4: UploadFile = File(None),
|
329 |
+
max_length: Optional[int] = Form(None),
|
330 |
+
temperature: Optional[float] = Form(None),
|
331 |
+
top_p: Optional[float] = Form(None),
|
332 |
+
do_sample: Optional[bool] = Form(None)):
|
333 |
+
"""[ํตํฉ ์๋ํฌ์ธํธ] ํ
์คํธ ๋ฐ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ"""
|
334 |
+
if not model_loaded:
|
335 |
+
raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค.")
|
336 |
+
|
337 |
+
start_time = time.time()
|
338 |
+
loop = asyncio.get_event_loop()
|
339 |
+
|
340 |
+
# ๋ค์ค ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
341 |
+
image_data_list = []
|
342 |
+
for img in [image1, image2, image3, image4]:
|
343 |
+
if img:
|
344 |
+
image_data = await img.read()
|
345 |
+
image_data_list.append(image_data)
|
346 |
+
|
347 |
+
result = await loop.run_in_executor(executor, generate_sync, prompt, image_data_list,
|
348 |
+
max_length, temperature, top_p, do_sample)
|
349 |
+
|
350 |
+
processing_time = time.time() - start_time
|
351 |
+
logger.info(f"โ
์์ฑ ์๋ฃ ({processing_time:.2f}์ด), ์ด๋ฏธ์ง ์ฒ๋ฆฌ: {result['image_processed']}")
|
352 |
+
|
353 |
+
return GenerateResponse(
|
354 |
+
generated_text=result["text"],
|
355 |
+
processing_time=processing_time,
|
356 |
+
model_name=current_profile.display_name,
|
357 |
+
image_processed=result["image_processed"]
|
358 |
+
)
|
359 |
+
|
360 |
+
@app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
|
361 |
+
async def generate_multimodal(
|
362 |
+
prompt: str = Form(...),
|
363 |
+
image: UploadFile = File(None),
|
364 |
+
model_id: Optional[str] = Form(None),
|
365 |
+
max_length: Optional[int] = Form(None),
|
366 |
+
temperature: Optional[float] = Form(None),
|
367 |
+
top_p: Optional[float] = Form(None),
|
368 |
+
do_sample: Optional[bool] = Form(None)
|
369 |
+
):
|
370 |
+
"""๋ฉํฐ๋ชจ๋ฌ ํ
์คํธ ์์ฑ (์ด๋ฏธ์ง + ํ
์คํธ)"""
|
371 |
+
start_time = time.time()
|
372 |
+
|
373 |
+
try:
|
374 |
+
# ๋ชจ๋ธ ๋ก๋ ํ์ธ
|
375 |
+
if not model_loaded or not model or not tokenizer or not current_profile:
|
376 |
+
raise HTTPException(status_code=500, detail="๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค")
|
377 |
+
|
378 |
+
# Kanana ๋ชจ๋ธ์ด ์๋๋ฉด ์ผ๋ฐ ํ
์คํธ ์์ฑ์ผ๋ก ๋ฆฌ๋ค์ด๋ ํธ
|
379 |
+
if "kanana" not in current_profile.model_name.lower():
|
380 |
+
logger.warning("๋ฉํฐ๋ชจ๋ฌ ์์ฒญ์ด์ง๋ง Kanana ๋ชจ๋ธ์ด ์๋ - ์ผ๋ฐ ํ
์คํธ ์์ฑ์ผ๋ก ์ฒ๋ฆฌ")
|
381 |
+
# ์ผ๋ฐ generate ์๋ํฌ์ธํธ๋ก ๋ฆฌ๋ค์ด๋ ํธ
|
382 |
+
loop = asyncio.get_event_loop()
|
383 |
+
result = await loop.run_in_executor(executor, generate_sync, prompt, None,
|
384 |
+
max_length, temperature, top_p, do_sample)
|
385 |
+
return MultimodalGenerateResponse(
|
386 |
+
generated_text=result["text"],
|
387 |
+
processing_time=time.time() - start_time,
|
388 |
+
model_name=current_profile.display_name,
|
389 |
+
model_id=model_id or current_profile.get_model_info()["model_name"],
|
390 |
+
image_processed=False
|
391 |
+
)
|
392 |
+
|
393 |
+
logger.info(f"๐ผ๏ธ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์์: '{prompt}'")
|
394 |
+
|
395 |
+
# ์ด๋ฏธ์ง ์ฒ๋ฆฌ
|
396 |
+
pixel_values = None
|
397 |
+
image_metas = None
|
398 |
+
image_processed = False
|
399 |
+
if image:
|
400 |
+
logger.info(f"๐ธ ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์ค: {image.filename}")
|
401 |
+
try:
|
402 |
+
# ์ด๋ฏธ์ง ํ์ผ ์ฝ๊ธฐ
|
403 |
+
image_data = await image.read()
|
404 |
+
pil_image = Image.open(io.BytesIO(image_data))
|
405 |
+
|
406 |
+
# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ (Kanana ๋ชจ๋ธ์ ๋ง๊ฒ)
|
407 |
+
logger.info(f"โ
์ด๋ฏธ์ง ๋ก๋ ์๋ฃ: {pil_image.size}")
|
408 |
+
|
409 |
+
# Kanana ๋ชจ๋ธ์ ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
|
410 |
+
from transformers import AutoImageProcessor
|
411 |
+
|
412 |
+
# ์ด๋ฏธ์ง ํ๋ก์ธ์ ๋ก๋
|
413 |
+
image_processor = AutoImageProcessor.from_pretrained(
|
414 |
+
current_profile.get_model_info()["local_path"],
|
415 |
+
trust_remote_code=True
|
416 |
+
)
|
417 |
+
|
418 |
+
# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ (Kanana ๋ฐฉ์)
|
419 |
+
processed_images = image_processor(pil_image)
|
420 |
+
pixel_values = processed_images["pixel_values"]
|
421 |
+
image_metas = processed_images["image_meta"]
|
422 |
+
|
423 |
+
# ๋๋ฐ์ด์ค๋ก ์ด๋
|
424 |
+
pixel_values = pixel_values.to(model.device)
|
425 |
+
if image_metas and "vision_grid_thw" in image_metas:
|
426 |
+
# vision_grid_thw๊ฐ ์ค์นผ๋ผ๊ฐ ์๋ ํ
์์ธ์ง ํ์ธ
|
427 |
+
grid_thw = image_metas["vision_grid_thw"]
|
428 |
+
if isinstance(grid_thw, (list, tuple)):
|
429 |
+
grid_thw = torch.tensor(grid_thw)
|
430 |
+
elif not isinstance(grid_thw, torch.Tensor):
|
431 |
+
grid_thw = torch.tensor([grid_thw])
|
432 |
+
image_metas["vision_grid_thw"] = grid_thw.to(model.device)
|
433 |
+
|
434 |
+
# ๋๋ฒ๊น
์ ์ํ ๋ก๊ทธ ์ถ๊ฐ
|
435 |
+
logger.info(f"๐ pixel_values ํํ: {pixel_values.shape}")
|
436 |
+
logger.info(f"๐ image_metas keys: {list(image_metas.keys()) if image_metas else 'None'}")
|
437 |
+
if image_metas and "vision_grid_thw" in image_metas:
|
438 |
+
logger.info(f"๐ vision_grid_thw ํํ: {image_metas['vision_grid_thw'].shape}")
|
439 |
+
|
440 |
+
image_processed = True
|
441 |
+
logger.info(f"โ
์ด๋ฏธ์ง ํ
์ ๋ณํ ์๋ฃ: {pixel_values.shape}")
|
442 |
+
|
443 |
+
except Exception as e:
|
444 |
+
logger.error(f"โ ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์คํจ: {e}")
|
445 |
+
pixel_values = None
|
446 |
+
image_metas = None
|
447 |
+
image_processed = False
|
448 |
+
logger.info("๐ ์ด๋ฏธ์ง ์์ด ํ
์คํธ๋ง ์ฒ๋ฆฌํฉ๋๋ค.")
|
449 |
+
else:
|
450 |
+
logger.info("๐ธ ์ด๋ฏธ์ง ์์ - ํ
์คํธ๋ง ์ฒ๋ฆฌ")
|
451 |
+
image_processed = False
|
452 |
+
|
453 |
+
# Kanana ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ ํ์
|
454 |
+
system_prompt = "๋น์ ์ ํ๊ตญ์ด๋ก ๋ํํ๋ AI ์ด์์คํดํธ์
๋๋ค. ๋ชจ๋ ์๋ต์ ํ๊ตญ์ด๋ก ํด์ฃผ์ธ์."
|
455 |
+
if image_processed:
|
456 |
+
formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
457 |
+
logger.info(f"๐ผ๏ธ Kanana ๋ฉํฐ๋ชจ๋ฌ ํ๋กฌํํธ: '{formatted_prompt}'")
|
458 |
+
else:
|
459 |
+
formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
460 |
+
logger.info(f"๐ Kanana ํ
์คํธ ์ ์ฉ ํ๋กฌํํธ: '{formatted_prompt}'")
|
461 |
+
|
462 |
+
# ํ ํฌ๋์ด์ง
|
463 |
+
inputs = tokenizer(
|
464 |
+
formatted_prompt,
|
465 |
+
return_tensors="pt",
|
466 |
+
padding=True,
|
467 |
+
truncation=True,
|
468 |
+
max_length=100,
|
469 |
+
)
|
470 |
+
|
471 |
+
if 'token_type_ids' in inputs:
|
472 |
+
del inputs['token_type_ids']
|
473 |
+
|
474 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
475 |
+
logger.info(f"์
๋ ฅ ํ ํฐ ์: {inputs['input_ids'].shape[1]}")
|
476 |
+
|
477 |
+
# Kanana์ฉ ์์ฑ ์ค์
|
478 |
+
max_new_tokens = max_length or 100
|
479 |
+
temperature = temperature or 0.7
|
480 |
+
top_p = top_p or 0.9
|
481 |
+
do_sample = do_sample if do_sample is not None else True
|
482 |
+
|
483 |
+
with torch.no_grad():
|
484 |
+
if image_processed and pixel_values is not None:
|
485 |
+
# Kanana ๋ชจ๋ธ์ ์ค์ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์๋
|
486 |
+
logger.info("๐ Kanana ๋ชจ๋ธ์ ์ค์ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์๋...")
|
487 |
+
|
488 |
+
try:
|
489 |
+
# vision_grid_thw๋ฅผ ์ฌ๋ฐ๋ฅธ ํํ๋ก ๋ณํ ์๋
|
490 |
+
if 'vision_grid_thw' in image_metas:
|
491 |
+
grid_thw = image_metas['vision_grid_thw']
|
492 |
+
if isinstance(grid_thw, (list, tuple)):
|
493 |
+
grid_thw = torch.tensor(grid_thw)
|
494 |
+
elif not isinstance(grid_thw, torch.Tensor):
|
495 |
+
grid_thw = torch.tensor([grid_thw])
|
496 |
+
image_metas['vision_grid_thw'] = grid_thw.to(model.device)
|
497 |
+
|
498 |
+
# forward_vision๊ณผ forward_projector๋ฅผ ๋ถ๋ฆฌํด์ ์๋
|
499 |
+
visual_features = model.forward_vision(pixel_values, image_metas)
|
500 |
+
visual_embeds = model.forward_projector(visual_features, image_metas)
|
501 |
+
|
502 |
+
# ํ
์คํธ ์๋ฒ ๋ฉ ์์ฑ
|
503 |
+
text_embeds = model.embed_text_tokens(inputs["input_ids"])
|
504 |
+
|
505 |
+
# ์๊ฐ์ ์๋ฒ ๋ฉ์ ํ
์คํธ ์๋ฒ ๋ฉ๊ณผ ๊ฐ์ dtype์ผ๋ก ๋ณํ
|
506 |
+
target_dtype = text_embeds.dtype
|
507 |
+
visual_embeds_converted = visual_embeds.to(target_dtype)
|
508 |
+
|
509 |
+
# ํ
์คํธ ์๋ฒ ๋ฉ์ ํ๋ฉดํ
|
510 |
+
from einops import rearrange
|
511 |
+
flattened_text_embeds = rearrange(text_embeds, "b l d -> (b l) d")
|
512 |
+
flattened_input_ids = rearrange(inputs["input_ids"], "b l -> (b l)")
|
513 |
+
|
514 |
+
# -1 ํ ํฐ ์์น์ ์๊ฐ์ ์๋ฒ ๋ฉ ์ฝ์
|
515 |
+
mask = (flattened_input_ids == -1)
|
516 |
+
if mask.sum() > 0:
|
517 |
+
flattened_text_embeds[mask] = visual_embeds_converted[:mask.sum()]
|
518 |
+
|
519 |
+
# ๋ค์ ๋ฐฐ์น ํํ๋ก ์ฌ๊ตฌ์ฑ
|
520 |
+
input_embeds = rearrange(flattened_text_embeds, "(b l) d -> b l d", b=inputs["input_ids"].shape[0])
|
521 |
+
attention_mask = inputs["attention_mask"]
|
522 |
+
|
523 |
+
# ์ธ์ด ๋ชจ๋ธ์ dtype์ ๋ง์ถฐ input_embeds ๋ณํ
|
524 |
+
language_model_dtype = next(model.language_model.parameters()).dtype
|
525 |
+
if input_embeds.dtype != language_model_dtype:
|
526 |
+
input_embeds = input_embeds.to(language_model_dtype)
|
527 |
+
|
528 |
+
# Kanana ๋ชจ๋ธ์ ์๋ generate ๋ฉ์๋ ์ฌ์ฉ
|
529 |
+
outputs = model.generate(
|
530 |
+
input_ids=inputs["input_ids"],
|
531 |
+
pixel_values=pixel_values,
|
532 |
+
image_metas=image_metas,
|
533 |
+
attention_mask=inputs["attention_mask"],
|
534 |
+
max_new_tokens=max_new_tokens,
|
535 |
+
do_sample=do_sample,
|
536 |
+
temperature=temperature,
|
537 |
+
top_k=40,
|
538 |
+
top_p=top_p,
|
539 |
+
repetition_penalty=1.1,
|
540 |
+
no_repeat_ngram_size=2,
|
541 |
+
pad_token_id=tokenizer.eos_token_id,
|
542 |
+
eos_token_id=tokenizer.eos_token_id,
|
543 |
+
use_cache=True
|
544 |
+
)
|
545 |
+
logger.info("โ
์ค์ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์ฑ๊ณต!")
|
546 |
+
|
547 |
+
except Exception as e:
|
548 |
+
logger.error(f"โ ์ค์ ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์คํจ: {e}")
|
549 |
+
logger.error(f"๐ ์ค๋ฅ ํ์
: {type(e).__name__}")
|
550 |
+
import traceback
|
551 |
+
logger.error(f"๐ ์์ธ ์ค๋ฅ: {traceback.format_exc()}")
|
552 |
+
logger.info("๐ fallback: ํ
์คํธ ์ ์ฉ ์ฒ๋ฆฌ๋ก ์ ํ")
|
553 |
+
|
554 |
+
# fallback: ํ
์คํธ ์ ์ฉ ์ฒ๋ฆฌ
|
555 |
+
enhanced_prompt = f"[์ด๋ฏธ์ง ์ฒจ๋ถ๋จ] {prompt}"
|
556 |
+
enhanced_formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{enhanced_prompt}<|im_end|>\n<|im_start|>assistant\n"
|
557 |
+
|
558 |
+
enhanced_inputs = tokenizer(
|
559 |
+
enhanced_formatted_prompt,
|
560 |
+
return_tensors="pt",
|
561 |
+
padding=True,
|
562 |
+
truncation=True,
|
563 |
+
max_length=256
|
564 |
+
)
|
565 |
+
|
566 |
+
if 'token_type_ids' in enhanced_inputs:
|
567 |
+
del enhanced_inputs['token_type_ids']
|
568 |
+
|
569 |
+
enhanced_inputs = {k: v.to(model.device) for k, v in enhanced_inputs.items()}
|
570 |
+
|
571 |
+
outputs = model.language_model.generate(
|
572 |
+
input_ids=enhanced_inputs["input_ids"],
|
573 |
+
attention_mask=enhanced_inputs["attention_mask"],
|
574 |
+
max_new_tokens=max_new_tokens,
|
575 |
+
do_sample=do_sample,
|
576 |
+
temperature=temperature,
|
577 |
+
top_k=40,
|
578 |
+
top_p=top_p,
|
579 |
+
repetition_penalty=1.1,
|
580 |
+
no_repeat_ngram_size=2,
|
581 |
+
pad_token_id=tokenizer.eos_token_id,
|
582 |
+
eos_token_id=tokenizer.eos_token_id,
|
583 |
+
use_cache=True
|
584 |
+
)
|
585 |
+
else:
|
586 |
+
# ํ
์คํธ ์ ์ฉ ์์ฑ
|
587 |
+
logger.info("๐ ํ
์คํธ ์ ์ฉ ์์ฑ ์์")
|
588 |
+
outputs = model.generate(
|
589 |
+
input_ids=inputs["input_ids"],
|
590 |
+
attention_mask=inputs["attention_mask"],
|
591 |
+
max_new_tokens=max_new_tokens,
|
592 |
+
do_sample=do_sample,
|
593 |
+
temperature=temperature,
|
594 |
+
top_k=40,
|
595 |
+
top_p=top_p,
|
596 |
+
repetition_penalty=1.1,
|
597 |
+
no_repeat_ngram_size=2,
|
598 |
+
pad_token_id=tokenizer.eos_token_id,
|
599 |
+
eos_token_id=tokenizer.eos_token_id,
|
600 |
+
use_cache=True
|
601 |
+
)
|
602 |
+
|
603 |
+
# ์๋ต ์ถ์ถ
|
604 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
605 |
+
logger.info(f"์์ฑ๋ ํ ํฐ ์: {outputs.shape[1]}")
|
606 |
+
logger.info(f"๋์ฝ๋ฉ๋ ์ ์ฒด ํ
์คํธ: '{generated_text}'")
|
607 |
+
|
608 |
+
# Kanana ๋ฉํฐ๋ชจ๋ฌ ์๋ต ์ถ์ถ
|
609 |
+
if "<|im_start|>assistant" in generated_text:
|
610 |
+
response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
|
611 |
+
logger.info(f"๐ Kanana ๋ฉํฐ๋ชจ๋ฌ ์๋ต ์ถ์ถ: '{response}'")
|
612 |
+
else:
|
613 |
+
response = generated_text.strip()
|
614 |
+
logger.info(f"๐ Kanana ์ ์ฒด ํ
์คํธ: '{response}'")
|
615 |
+
|
616 |
+
processing_time = time.time() - start_time
|
617 |
+
logger.info(f"๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์๋ฃ: {processing_time:.2f}์ด, ํ
์คํธ ๊ธธ์ด: {len(response)}, ์ด๋ฏธ์ง ์ฒ๋ฆฌ: {image_processed}")
|
618 |
+
|
619 |
+
return MultimodalGenerateResponse(
|
620 |
+
generated_text=response,
|
621 |
+
processing_time=processing_time,
|
622 |
+
model_name=current_profile.display_name,
|
623 |
+
model_id=model_id or current_profile.get_model_info()["model_name"],
|
624 |
+
image_processed=image_processed
|
625 |
+
)
|
626 |
+
|
627 |
+
except Exception as e:
|
628 |
+
processing_time = time.time() - start_time
|
629 |
+
logger.error(f"โ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์ค๋ฅ: {e} (์์ ์๊ฐ: {processing_time:.2f}์ด)")
|
630 |
+
raise HTTPException(status_code=500, detail=f"๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์คํจ: {str(e)}")
|
631 |
+
|
632 |
+
@app.get("/models")
|
633 |
+
async def list_models():
|
634 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก"""
|
635 |
+
return {
|
636 |
+
"models": list_available_models(),
|
637 |
+
"current_model": current_profile.get_model_info() if current_profile else None
|
638 |
+
}
|
639 |
+
|
640 |
+
@app.post("/switch-model")
|
641 |
+
async def switch_model(model_id: str):
|
642 |
+
"""๋ชจ๋ธ ๋ณ๊ฒฝ"""
|
643 |
+
try:
|
644 |
+
await load_model_async(model_id)
|
645 |
+
return {
|
646 |
+
"message": f"๋ชจ๋ธ ๋ณ๊ฒฝ ์ฑ๊ณต: {model_id}",
|
647 |
+
"current_model": current_profile.display_name
|
648 |
+
}
|
649 |
+
except Exception as e:
|
650 |
+
raise HTTPException(status_code=500, detail=f"๋ชจ๋ธ ๋ณ๊ฒฝ ์คํจ: {str(e)}")
|
651 |
+
|
652 |
+
@app.get("/", response_model=dict)
|
653 |
+
async def root():
|
654 |
+
"""๋ฃจํธ ์๋ํฌ์ธํธ"""
|
655 |
+
return {
|
656 |
+
"message": "Lily LLM API v2 ์๋ฒ",
|
657 |
+
"version": "2.0.0",
|
658 |
+
"current_model": current_profile.display_name if current_profile else "None",
|
659 |
+
"docs": "/docs"
|
660 |
+
}
|
661 |
+
|
662 |
+
@app.get("/health", response_model=HealthResponse)
|
663 |
+
async def health_check():
|
664 |
+
"""ํฌ์ค ์ฒดํฌ ์๋ํฌ์ธํธ"""
|
665 |
+
available_models = list_available_models()
|
666 |
+
|
667 |
+
return HealthResponse(
|
668 |
+
status="healthy",
|
669 |
+
model_loaded=model_loaded,
|
670 |
+
current_model=current_profile.display_name if current_profile else "None",
|
671 |
+
available_models=available_models
|
672 |
+
)
|
673 |
+
|
674 |
+
@app.post("/document/upload", response_model=DocumentUploadResponse)
|
675 |
+
async def upload_document(
|
676 |
+
file: UploadFile = File(...),
|
677 |
+
user_id: str = Form("default_user"), # ๊ธฐ๋ณธ ์ฌ์ฉ์ ID
|
678 |
+
document_id: Optional[str] = Form(None) # ๋ฌธ์ ID (์๋ ์์ฑ ๊ฐ๋ฅ)
|
679 |
+
):
|
680 |
+
"""๋ฌธ์ ์
๋ก๋ ๋ฐ RAG ์ฒ๋ฆฌ"""
|
681 |
+
start_time = time.time()
|
682 |
+
|
683 |
+
try:
|
684 |
+
# ๋ฌธ์ ID ์์ฑ (์ ๊ณต๋์ง ์์ ๊ฒฝ์ฐ)
|
685 |
+
if not document_id:
|
686 |
+
import uuid
|
687 |
+
document_id = str(uuid.uuid4())[:8]
|
688 |
+
|
689 |
+
# ์์ ํ์ผ ์ ์ฅ
|
690 |
+
temp_file_path = f"./temp_{document_id}_{file.filename}"
|
691 |
+
with open(temp_file_path, "wb") as f:
|
692 |
+
content = await file.read()
|
693 |
+
f.write(content)
|
694 |
+
|
695 |
+
# ๋ฌธ์ ์ฒ๋ฆฌ ๋ฐ ๋ฒกํฐ ์คํ ์ด์ ์ ์ฅ
|
696 |
+
result = rag_processor.process_and_store_document(
|
697 |
+
user_id, document_id, temp_file_path
|
698 |
+
)
|
699 |
+
|
700 |
+
# ์์ ํ์ผ ์ญ์
|
701 |
+
import os
|
702 |
+
if os.path.exists(temp_file_path):
|
703 |
+
os.remove(temp_file_path)
|
704 |
+
|
705 |
+
processing_time = time.time() - start_time
|
706 |
+
logger.info(f"๐ ๋ฌธ์ ์
๋ก๋ ์๋ฃ ({processing_time:.2f}์ด): {file.filename}")
|
707 |
+
|
708 |
+
# ๋ฌธ์ ์
๋ก๋ ํ ์๋์ผ๋ก AI ์๋ต ์์ฑ
|
709 |
+
if result["success"]:
|
710 |
+
try:
|
711 |
+
# ๊ฐ๋จํ ์์ฝ ์ง๋ฌธ์ผ๋ก AI ์๋ต ์์ฑ
|
712 |
+
summary_query = f"์
๋ก๋๋ ๋ฌธ์ '{file.filename}'์ ์ฃผ์ ๋ด์ฉ์ ์์ฝํด์ฃผ์ธ์."
|
713 |
+
|
714 |
+
logger.info(f"๐ค ๋ฌธ์ ์
๋ก๋ ํ AI ์๋ต ์์ฑ ์์...")
|
715 |
+
|
716 |
+
rag_result = rag_processor.generate_rag_response(
|
717 |
+
user_id, document_id, summary_query, llm_model=model
|
718 |
+
)
|
719 |
+
|
720 |
+
if rag_result["success"]:
|
721 |
+
logger.info(f"โ
์๋ AI ์๋ต ์์ฑ ์๋ฃ: {len(rag_result['response'])} ๋ฌธ์")
|
722 |
+
result["auto_response"] = rag_result["response"]
|
723 |
+
else:
|
724 |
+
logger.warning(f"โ ๏ธ ์๋ AI ์๋ต ์์ฑ ์คํจ: {rag_result.get('error', 'Unknown error')}")
|
725 |
+
|
726 |
+
except Exception as e:
|
727 |
+
logger.error(f"โ ์๋ AI ์๋ต ์์ฑ ์ค ์ค๋ฅ: {e}")
|
728 |
+
|
729 |
+
return DocumentUploadResponse(
|
730 |
+
success=result["success"],
|
731 |
+
document_id=document_id,
|
732 |
+
message=result.get("message", ""),
|
733 |
+
chunks=result.get("chunks"),
|
734 |
+
latex_count=result.get("latex_count"),
|
735 |
+
error=result.get("error"),
|
736 |
+
auto_response=result.get("auto_response", "") # ์๋ ์๋ต ์ถ๊ฐ
|
737 |
+
)
|
738 |
+
|
739 |
+
except Exception as e:
|
740 |
+
logger.error(f"โ ๋ฌธ์ ์
๋ก๋ ์คํจ: {e}")
|
741 |
+
return DocumentUploadResponse(
|
742 |
+
success=False,
|
743 |
+
document_id=document_id if 'document_id' in locals() else "unknown",
|
744 |
+
message="๋ฌธ์ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
745 |
+
error=str(e)
|
746 |
+
)
|
747 |
+
|
748 |
+
@app.post("/rag/generate", response_model=RAGResponse)
|
749 |
+
async def generate_rag_response(
|
750 |
+
query: str = Form(...),
|
751 |
+
user_id: str = Form("default_user"),
|
752 |
+
document_id: str = Form(...),
|
753 |
+
max_length: Optional[int] = Form(None),
|
754 |
+
temperature: Optional[float] = Form(None),
|
755 |
+
top_p: Optional[float] = Form(None),
|
756 |
+
do_sample: Optional[bool] = Form(None)
|
757 |
+
):
|
758 |
+
"""RAG ๊ธฐ๋ฐ ์๋ต ์์ฑ"""
|
759 |
+
start_time = time.time()
|
760 |
+
|
761 |
+
try:
|
762 |
+
# ๋ชจ๋ธ์ด ๋ก๋๋์๋์ง ํ์ธ
|
763 |
+
llm_model = None
|
764 |
+
if model is not None and hasattr(model, 'generate_text'):
|
765 |
+
llm_model = model
|
766 |
+
logger.info("โ
๋ก๋๋ ๋ชจ๋ธ์ ์ฌ์ฉํ์ฌ RAG ์๋ต ์์ฑ")
|
767 |
+
else:
|
768 |
+
logger.warning("โ ๏ธ ๋ชจ๋ธ์ด ๋ก๋๋์ง ์์ ํ
์คํธ ๊ธฐ๋ฐ ์๋ต๋ง ์์ฑ")
|
769 |
+
|
770 |
+
# RAG ์๋ต ์์ฑ
|
771 |
+
result = rag_processor.generate_rag_response(
|
772 |
+
user_id, document_id, query, llm_model=llm_model
|
773 |
+
)
|
774 |
+
|
775 |
+
processing_time = time.time() - start_time
|
776 |
+
logger.info(f"๐ RAG ์๋ต ์์ฑ ์๋ฃ ({processing_time:.2f}์ด)")
|
777 |
+
|
778 |
+
return RAGResponse(
|
779 |
+
success=result["success"],
|
780 |
+
response=result["response"],
|
781 |
+
context=result["context"],
|
782 |
+
sources=result["sources"],
|
783 |
+
search_results=result["search_results"],
|
784 |
+
processing_time=processing_time
|
785 |
+
)
|
786 |
+
|
787 |
+
except Exception as e:
|
788 |
+
logger.error(f"โ RAG ์๋ต ์์ฑ ์คํจ: {e}")
|
789 |
+
return RAGResponse(
|
790 |
+
success=False,
|
791 |
+
response=f"RAG ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
792 |
+
context="",
|
793 |
+
sources=[],
|
794 |
+
search_results=0,
|
795 |
+
processing_time=0.0
|
796 |
+
)
|
797 |
+
|
798 |
+
@app.post("/rag/generate-hybrid", response_model=RAGResponse)
|
799 |
+
async def generate_hybrid_rag_response(
|
800 |
+
query: str = Form(...),
|
801 |
+
user_id: str = Form("default_user"),
|
802 |
+
document_id: str = Form(...),
|
803 |
+
image1: UploadFile = File(None),
|
804 |
+
image2: UploadFile = File(None),
|
805 |
+
image3: UploadFile = File(None),
|
806 |
+
image4: UploadFile = File(None),
|
807 |
+
image5: UploadFile = File(None),
|
808 |
+
max_length: Optional[int] = Form(None),
|
809 |
+
temperature: Optional[float] = Form(None),
|
810 |
+
top_p: Optional[float] = Form(None),
|
811 |
+
do_sample: Optional[bool] = Form(None)
|
812 |
+
):
|
813 |
+
"""ํ์ด๋ธ๋ฆฌ๋ RAG ๊ธฐ๋ฐ ์๋ต ์์ฑ (์ด๋ฏธ์ง + ๋ฌธ์)"""
|
814 |
+
start_time = time.time()
|
815 |
+
|
816 |
+
try:
|
817 |
+
# ์ด๋ฏธ์ง ํ์ผ ์ฒ๋ฆฌ
|
818 |
+
image_files = []
|
819 |
+
uploaded_images = [image1, image2, image3, image4, image5]
|
820 |
+
|
821 |
+
for i, img in enumerate(uploaded_images):
|
822 |
+
if img:
|
823 |
+
try:
|
824 |
+
# ์์ ํ์ผ๋ก ์ ์ฅ
|
825 |
+
import tempfile
|
826 |
+
with tempfile.NamedTemporaryFile(
|
827 |
+
suffix=f"_{i}.png",
|
828 |
+
delete=False,
|
829 |
+
prefix="hybrid_image_"
|
830 |
+
) as temp_file:
|
831 |
+
image_data = await img.read()
|
832 |
+
temp_file.write(image_data)
|
833 |
+
image_files.append(temp_file.name)
|
834 |
+
logger.info(f"๐ธ ์ด๋ฏธ์ง ์
๋ก๋: {img.filename} -> {temp_file.name}")
|
835 |
+
except Exception as e:
|
836 |
+
logger.error(f"โ ์ด๋ฏธ์ง ์ฒ๋ฆฌ ์คํจ: {e}")
|
837 |
+
|
838 |
+
# RAG ์๋ต ์์ฑ (์ด๋ฏธ์ง ํฌํจ)
|
839 |
+
result = rag_processor.generate_rag_response(
|
840 |
+
user_id, document_id, query,
|
841 |
+
llm_model=model, # ์ค์ ๋ชจ๋ธ ์ธ์คํด์ค ์ฌ์ฉ
|
842 |
+
image_files=image_files if image_files else None
|
843 |
+
)
|
844 |
+
|
845 |
+
# ์์ ์ด๋ฏธ์ง ํ์ผ ์ ๋ฆฌ
|
846 |
+
for temp_file in image_files:
|
847 |
+
try:
|
848 |
+
if os.path.exists(temp_file):
|
849 |
+
os.remove(temp_file)
|
850 |
+
logger.info(f"๐๏ธ ์์ ์ด๋ฏธ์ง ํ์ผ ์ญ์ : {temp_file}")
|
851 |
+
except Exception as e:
|
852 |
+
logger.warning(f"โ ๏ธ ์์ ํ์ผ ์ญ์ ์คํจ: {e}")
|
853 |
+
|
854 |
+
processing_time = time.time() - start_time
|
855 |
+
logger.info(f"๐ ํ์ด๋ธ๋ฆฌ๋ RAG ์๋ต ์์ฑ ์๋ฃ ({processing_time:.2f}์ด)")
|
856 |
+
|
857 |
+
return RAGResponse(
|
858 |
+
success=result["success"],
|
859 |
+
response=result["response"],
|
860 |
+
context=result["context"],
|
861 |
+
sources=result["sources"],
|
862 |
+
search_results=result["search_results"],
|
863 |
+
processing_time=processing_time
|
864 |
+
)
|
865 |
+
|
866 |
+
except Exception as e:
|
867 |
+
logger.error(f"โ ํ์ด๋ธ๋ฆฌ๋ RAG ์๋ต ์์ฑ ์คํจ: {e}")
|
868 |
+
return RAGResponse(
|
869 |
+
success=False,
|
870 |
+
response=f"์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
871 |
+
context="",
|
872 |
+
sources=[],
|
873 |
+
search_results=0,
|
874 |
+
processing_time=time.time() - start_time
|
875 |
+
)
|
876 |
+
|
877 |
+
@app.get("/documents/{user_id}")
|
878 |
+
async def list_user_documents(user_id: str):
|
879 |
+
"""์ฌ์ฉ์์ ๋ฌธ์ ๋ชฉ๋ก ์กฐํ"""
|
880 |
+
try:
|
881 |
+
from lily_llm_core.vector_store_manager import vector_store_manager
|
882 |
+
documents = vector_store_manager.get_all_documents(user_id)
|
883 |
+
return documents
|
884 |
+
except Exception as e:
|
885 |
+
logger.error(f"โ ๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์คํจ: {e}")
|
886 |
+
return {"documents": [], "total_docs": 0, "error": str(e)}
|
887 |
+
|
888 |
+
@app.delete("/document/{user_id}/{document_id}")
|
889 |
+
async def delete_document(user_id: str, document_id: str):
|
890 |
+
"""๋ฌธ์ ์ญ์ """
|
891 |
+
try:
|
892 |
+
result = rag_processor.delete_document(user_id, document_id)
|
893 |
+
return result
|
894 |
+
except Exception as e:
|
895 |
+
logger.error(f"โ ๋ฌธ์ ์ญ์ ์คํจ: {e}")
|
896 |
+
return {"success": False, "error": str(e)}
|
897 |
+
|
898 |
+
# ์ฌ์ฉ์ ๊ด๋ฆฌ ์๋ํฌ์ธํธ
|
899 |
+
@app.post("/user/create", response_model=UserResponse)
|
900 |
+
async def create_user(
|
901 |
+
user_id: str = Form(...),
|
902 |
+
username: Optional[str] = Form(None),
|
903 |
+
email: Optional[str] = Form(None)
|
904 |
+
):
|
905 |
+
"""์ฌ์ฉ์ ์์ฑ"""
|
906 |
+
try:
|
907 |
+
success = db_manager.add_user(user_id, username, email)
|
908 |
+
if success:
|
909 |
+
user_info = db_manager.get_user(user_id)
|
910 |
+
return UserResponse(
|
911 |
+
success=True,
|
912 |
+
user_id=user_id,
|
913 |
+
username=user_info.get('username') if user_info else None,
|
914 |
+
email=user_info.get('email') if user_info else None,
|
915 |
+
created_at=user_info.get('created_at') if user_info else None
|
916 |
+
)
|
917 |
+
else:
|
918 |
+
return UserResponse(success=False, user_id=user_id, error="์ฌ์ฉ์ ์์ฑ ์คํจ")
|
919 |
+
except Exception as e:
|
920 |
+
logger.error(f"โ ์ฌ์ฉ์ ์์ฑ ์ค๋ฅ: {e}")
|
921 |
+
return UserResponse(success=False, user_id=user_id, error=str(e))
|
922 |
+
|
923 |
+
@app.get("/user/{user_id}", response_model=UserResponse)
|
924 |
+
async def get_user_info(user_id: str):
|
925 |
+
"""์ฌ์ฉ์ ์ ๋ณด ์กฐํ"""
|
926 |
+
try:
|
927 |
+
user_info = db_manager.get_user(user_id)
|
928 |
+
if user_info:
|
929 |
+
return UserResponse(
|
930 |
+
success=True,
|
931 |
+
user_id=user_id,
|
932 |
+
username=user_info.get('username'),
|
933 |
+
email=user_info.get('email'),
|
934 |
+
created_at=user_info.get('created_at')
|
935 |
+
)
|
936 |
+
else:
|
937 |
+
return UserResponse(success=False, user_id=user_id, error="์ฌ์ฉ์๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค")
|
938 |
+
except Exception as e:
|
939 |
+
logger.error(f"โ ์ฌ์ฉ์ ์กฐํ ์ค๋ฅ: {e}")
|
940 |
+
return UserResponse(success=False, user_id=user_id, error=str(e))
|
941 |
+
|
942 |
+
# ์ธ์
๊ด๋ฆฌ ์๋ํฌ์ธํธ
|
943 |
+
@app.post("/session/create", response_model=SessionResponse)
|
944 |
+
async def create_session(
|
945 |
+
user_id: str = Form(...),
|
946 |
+
session_name: Optional[str] = Form(None)
|
947 |
+
):
|
948 |
+
"""์ฑํ
์ธ์
์์ฑ"""
|
949 |
+
try:
|
950 |
+
session_id = db_manager.create_chat_session(user_id, session_name)
|
951 |
+
if session_id:
|
952 |
+
return SessionResponse(
|
953 |
+
success=True,
|
954 |
+
session_id=session_id,
|
955 |
+
session_name=session_name
|
956 |
+
)
|
957 |
+
else:
|
958 |
+
return SessionResponse(success=False, session_id="", error="์ธ์
์์ฑ ์คํจ")
|
959 |
+
except Exception as e:
|
960 |
+
logger.error(f"โ ์ธ์
์์ฑ ์ค๋ฅ: {e}")
|
961 |
+
return SessionResponse(success=False, session_id="", error=str(e))
|
962 |
+
|
963 |
+
@app.get("/sessions/{user_id}")
|
964 |
+
async def list_user_sessions(user_id: str):
|
965 |
+
"""์ฌ์ฉ์์ ์ธ์
๋ชฉ๋ก ์กฐํ"""
|
966 |
+
try:
|
967 |
+
sessions = db_manager.get_user_sessions(user_id)
|
968 |
+
return {
|
969 |
+
"success": True,
|
970 |
+
"user_id": user_id,
|
971 |
+
"sessions": sessions,
|
972 |
+
"total_sessions": len(sessions)
|
973 |
+
}
|
974 |
+
except Exception as e:
|
975 |
+
logger.error(f"โ ์ธ์
๋ชฉ๋ก ์กฐํ ์ค๋ฅ: {e}")
|
976 |
+
return {"success": False, "error": str(e)}
|
977 |
+
|
978 |
+
# ์ฑํ
๋ฉ์์ง ์๋ํฌ์ธํธ
|
979 |
+
@app.post("/chat/message", response_model=ChatMessageResponse)
|
980 |
+
async def add_chat_message(
|
981 |
+
session_id: str = Form(...),
|
982 |
+
user_id: str = Form(...),
|
983 |
+
message_type: str = Form(...),
|
984 |
+
content: str = Form(...)
|
985 |
+
):
|
986 |
+
"""์ฑํ
๋ฉ์์ง ์ถ๊ฐ"""
|
987 |
+
try:
|
988 |
+
success = db_manager.add_chat_message(session_id, user_id, message_type, content)
|
989 |
+
if success:
|
990 |
+
return ChatMessageResponse(
|
991 |
+
success=True,
|
992 |
+
message_id=0, # ์ค์ ID๋ DB์์ ์๋ ์์ฑ
|
993 |
+
content=content,
|
994 |
+
message_type=message_type,
|
995 |
+
timestamp=datetime.now().isoformat()
|
996 |
+
)
|
997 |
+
else:
|
998 |
+
return ChatMessageResponse(
|
999 |
+
success=False,
|
1000 |
+
message_id=0,
|
1001 |
+
content="",
|
1002 |
+
message_type="",
|
1003 |
+
timestamp="",
|
1004 |
+
error="๋ฉ์์ง ์ถ๊ฐ ์คํจ"
|
1005 |
+
)
|
1006 |
+
except Exception as e:
|
1007 |
+
logger.error(f"โ ๋ฉ์์ง ์ถ๊ฐ ์ค๋ฅ: {e}")
|
1008 |
+
return ChatMessageResponse(
|
1009 |
+
success=False,
|
1010 |
+
message_id=0,
|
1011 |
+
content="",
|
1012 |
+
message_type="",
|
1013 |
+
timestamp="",
|
1014 |
+
error=str(e)
|
1015 |
+
)
|
1016 |
+
|
1017 |
+
@app.get("/chat/history/{session_id}")
|
1018 |
+
async def get_chat_history(session_id: str, limit: int = 50):
|
1019 |
+
"""์ฑํ
ํ์คํ ๋ฆฌ ์กฐํ"""
|
1020 |
+
try:
|
1021 |
+
messages = db_manager.get_chat_history(session_id, limit)
|
1022 |
+
return {
|
1023 |
+
"success": True,
|
1024 |
+
"session_id": session_id,
|
1025 |
+
"messages": messages,
|
1026 |
+
"total_messages": len(messages)
|
1027 |
+
}
|
1028 |
+
except Exception as e:
|
1029 |
+
logger.error(f"โ ์ฑํ
ํ์คํ ๋ฆฌ ์กฐํ ์ค๋ฅ: {e}")
|
1030 |
+
return {"success": False, "error": str(e)}
|
1031 |
+
|
1032 |
+
# ๋ฌธ์ ๊ด๋ฆฌ ์๋ํฌ์ธํธ (DB ์ฐ๋)
|
1033 |
+
@app.get("/documents/db/{user_id}")
|
1034 |
+
async def list_user_documents_db(user_id: str):
|
1035 |
+
"""์ฌ์ฉ์์ ๋ฌธ์ ๋ชฉ๋ก ์กฐํ (DB ๊ธฐ๋ฐ)"""
|
1036 |
+
try:
|
1037 |
+
documents = db_manager.get_user_documents(user_id)
|
1038 |
+
return {
|
1039 |
+
"success": True,
|
1040 |
+
"user_id": user_id,
|
1041 |
+
"documents": documents,
|
1042 |
+
"total_documents": len(documents)
|
1043 |
+
}
|
1044 |
+
except Exception as e:
|
1045 |
+
logger.error(f"โ ๋ฌธ์ ๋ชฉ๋ก ์กฐํ ์ค๋ฅ: {e}")
|
1046 |
+
return {"success": False, "error": str(e)}
|
1047 |
+
|
1048 |
+
# ์ธ์ฆ ์๋ํฌ์ธํธ
|
1049 |
+
@app.post("/auth/login", response_model=LoginResponse)
|
1050 |
+
async def login(
|
1051 |
+
user_id: str = Form(...),
|
1052 |
+
password: str = Form(...)
|
1053 |
+
):
|
1054 |
+
"""์ฌ์ฉ์ ๋ก๊ทธ์ธ"""
|
1055 |
+
try:
|
1056 |
+
# ์ฌ์ฉ์ ์ ๋ณด ์กฐํ
|
1057 |
+
user_info = db_manager.get_user(user_id)
|
1058 |
+
if not user_info:
|
1059 |
+
return LoginResponse(success=False, error="์ฌ์ฉ์๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค")
|
1060 |
+
|
1061 |
+
# ๋น๋ฐ๋ฒํธ ๊ฒ์ฆ (๊ฐ๋จํ ๊ฒ์ฆ - ์ค์ ๋ก๋ DB์ ์ ์ฅ๋ ํด์์ ๋น๊ต)
|
1062 |
+
if not auth_manager.verify_password(password, "dummy_hash"): # ์ค์ ๊ตฌํ์์๋ DB์ ํด์์ ๋น๊ต
|
1063 |
+
return LoginResponse(success=False, error="๋น๋ฐ๋ฒํธ๊ฐ ์ฌ๋ฐ๋ฅด์ง ์์ต๋๋ค")
|
1064 |
+
|
1065 |
+
# ํ ํฐ ์์ฑ
|
1066 |
+
tokens = auth_manager.create_user_tokens(user_id, user_info.get('username'))
|
1067 |
+
|
1068 |
+
return LoginResponse(
|
1069 |
+
success=True,
|
1070 |
+
access_token=tokens['access_token'],
|
1071 |
+
refresh_token=tokens['refresh_token'],
|
1072 |
+
token_type=tokens['token_type'],
|
1073 |
+
user_id=user_id,
|
1074 |
+
username=user_info.get('username')
|
1075 |
+
)
|
1076 |
+
except Exception as e:
|
1077 |
+
logger.error(f"โ ๋ก๊ทธ์ธ ์ค๋ฅ: {e}")
|
1078 |
+
return LoginResponse(success=False, error=str(e))
|
1079 |
+
|
1080 |
+
@app.post("/auth/refresh", response_model=TokenResponse)
|
1081 |
+
async def refresh_token(refresh_token: str = Form(...)):
|
1082 |
+
"""์ก์ธ์ค ํ ํฐ ๊ฐฑ์ """
|
1083 |
+
try:
|
1084 |
+
new_access_token = auth_manager.refresh_access_token(refresh_token)
|
1085 |
+
|
1086 |
+
if new_access_token:
|
1087 |
+
return TokenResponse(
|
1088 |
+
success=True,
|
1089 |
+
access_token=new_access_token,
|
1090 |
+
token_type="bearer"
|
1091 |
+
)
|
1092 |
+
else:
|
1093 |
+
return TokenResponse(success=False, error="์ ํจํ์ง ์์ ๋ฆฌํ๋ ์ ํ ํฐ์
๋๋ค")
|
1094 |
+
except Exception as e:
|
1095 |
+
logger.error(f"โ ํ ํฐ ๊ฐฑ์ ์ค๋ฅ: {e}")
|
1096 |
+
return TokenResponse(success=False, error=str(e))
|
1097 |
+
|
1098 |
+
@app.post("/auth/register", response_model=LoginResponse)
|
1099 |
+
async def register(
|
1100 |
+
user_id: str = Form(...),
|
1101 |
+
username: str = Form(...),
|
1102 |
+
password: str = Form(...),
|
1103 |
+
email: Optional[str] = Form(None)
|
1104 |
+
):
|
1105 |
+
"""์ฌ์ฉ์ ๋ฑ๋ก"""
|
1106 |
+
try:
|
1107 |
+
# ๊ธฐ์กด ์ฌ์ฉ์ ํ์ธ
|
1108 |
+
existing_user = db_manager.get_user(user_id)
|
1109 |
+
if existing_user:
|
1110 |
+
return LoginResponse(success=False, error="์ด๋ฏธ ์กด์ฌํ๋ ์ฌ์ฉ์ ID์
๋๋ค")
|
1111 |
+
|
1112 |
+
# ๋น๋ฐ๋ฒํธ ํด์ฑ
|
1113 |
+
hashed_password = auth_manager.hash_password(password)
|
1114 |
+
|
1115 |
+
# ์ฌ์ฉ์ ์์ฑ (์ค์ ๊ตฌํ์์๋ hashed_password๋ฅผ DB์ ์ ์ฅ)
|
1116 |
+
success = db_manager.add_user(user_id, username, email)
|
1117 |
+
|
1118 |
+
if success:
|
1119 |
+
# ํ ํฐ ์์ฑ
|
1120 |
+
tokens = auth_manager.create_user_tokens(user_id, username)
|
1121 |
+
|
1122 |
+
return LoginResponse(
|
1123 |
+
success=True,
|
1124 |
+
access_token=tokens['access_token'],
|
1125 |
+
refresh_token=tokens['refresh_token'],
|
1126 |
+
token_type=tokens['token_type'],
|
1127 |
+
user_id=user_id,
|
1128 |
+
username=username
|
1129 |
+
)
|
1130 |
+
else:
|
1131 |
+
return LoginResponse(success=False, error="์ฌ์ฉ์ ๋ฑ๋ก์ ์คํจํ์ต๋๋ค")
|
1132 |
+
except Exception as e:
|
1133 |
+
logger.error(f"โ ์ฌ์ฉ์ ๋ฑ๋ก ์ค๋ฅ: {e}")
|
1134 |
+
return LoginResponse(success=False, error=str(e))
|
1135 |
+
|
1136 |
+
@app.get("/auth/me")
|
1137 |
+
async def get_current_user_info(credentials: HTTPAuthorizationCredentials = Depends(auth_manager.security)):
|
1138 |
+
"""ํ์ฌ ์ฌ์ฉ์ ์ ๋ณด ์กฐํ"""
|
1139 |
+
try:
|
1140 |
+
user_info = auth_manager.get_current_user(credentials)
|
1141 |
+
return {
|
1142 |
+
"success": True,
|
1143 |
+
"user_id": user_info.get("sub"),
|
1144 |
+
"username": user_info.get("username"),
|
1145 |
+
"token_type": user_info.get("type")
|
1146 |
+
}
|
1147 |
+
except Exception as e:
|
1148 |
+
logger.error(f"โ ์ฌ์ฉ์ ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}")
|
1149 |
+
return {"success": False, "error": str(e)}
|
1150 |
+
|
1151 |
+
# WebSocket ์ค์๊ฐ ์ฑํ
์๋ํฌ์ธํธ
|
1152 |
+
@app.websocket("/ws/{user_id}")
|
1153 |
+
async def websocket_endpoint(websocket: WebSocket, user_id: str, session_id: str = None):
|
1154 |
+
"""WebSocket ์ค์๊ฐ ์ฑํ
์๋ํฌ์ธํธ"""
|
1155 |
+
try:
|
1156 |
+
# ์ฐ๊ฒฐ ์๋ฝ
|
1157 |
+
await connection_manager.connect(websocket, user_id, session_id)
|
1158 |
+
|
1159 |
+
# ์ฐ๊ฒฐ ์ํ ๋ธ๋ก๋์บ์คํธ
|
1160 |
+
await connection_manager.broadcast_message({
|
1161 |
+
"type": "user_connected",
|
1162 |
+
"user_id": user_id,
|
1163 |
+
"session_id": session_id,
|
1164 |
+
"timestamp": datetime.now().isoformat()
|
1165 |
+
}, exclude_user=user_id)
|
1166 |
+
|
1167 |
+
# ๋ฉ์์ง ์์ ๋ฃจํ
|
1168 |
+
while True:
|
1169 |
+
try:
|
1170 |
+
# ๋ฉ์์ง ์์
|
1171 |
+
data = await websocket.receive_text()
|
1172 |
+
message_data = json.loads(data)
|
1173 |
+
|
1174 |
+
# ๋ฉ์์ง ํ์
์ ๋ฐ๋ฅธ ์ฒ๋ฆฌ
|
1175 |
+
message_type = message_data.get("type", "chat")
|
1176 |
+
|
1177 |
+
if message_type == "chat":
|
1178 |
+
# ์ฑํ
๋ฉ์์ง ์ฒ๋ฆฌ
|
1179 |
+
content = message_data.get("content", "")
|
1180 |
+
session_id = message_data.get("session_id")
|
1181 |
+
|
1182 |
+
# DB์ ๋ฉ์์ง ์ ์ฅ
|
1183 |
+
if session_id:
|
1184 |
+
db_manager.add_chat_message(
|
1185 |
+
session_id=session_id,
|
1186 |
+
user_id=user_id,
|
1187 |
+
message_type="user",
|
1188 |
+
content=content
|
1189 |
+
)
|
1190 |
+
|
1191 |
+
# ์ธ์
์ ๋ค๋ฅธ ์ฌ์ฉ์๋ค์๊ฒ ๋ฉ์์ง ์ ์ก
|
1192 |
+
await connection_manager.send_session_message({
|
1193 |
+
"type": "chat_message",
|
1194 |
+
"user_id": user_id,
|
1195 |
+
"content": content,
|
1196 |
+
"session_id": session_id,
|
1197 |
+
"timestamp": datetime.now().isoformat()
|
1198 |
+
}, session_id, exclude_user=user_id)
|
1199 |
+
|
1200 |
+
# AI ์๋ต ์์ฑ (์ ํ์ )
|
1201 |
+
if message_data.get("generate_ai_response", False):
|
1202 |
+
# AI ์๋ต ์์ฑ ๋ก์ง
|
1203 |
+
ai_response = await generate_ai_response(content, user_id)
|
1204 |
+
|
1205 |
+
# AI ์๋ต์ DB์ ์ ์ฅ
|
1206 |
+
if session_id:
|
1207 |
+
db_manager.add_chat_message(
|
1208 |
+
session_id=session_id,
|
1209 |
+
user_id="ai_assistant",
|
1210 |
+
message_type="assistant",
|
1211 |
+
content=ai_response
|
1212 |
+
)
|
1213 |
+
|
1214 |
+
# AI ์๋ต์ ์ธ์
์ฌ์ฉ์๋ค์๊ฒ ์ ์ก
|
1215 |
+
await connection_manager.send_session_message({
|
1216 |
+
"type": "ai_response",
|
1217 |
+
"user_id": "ai_assistant",
|
1218 |
+
"content": ai_response,
|
1219 |
+
"session_id": session_id,
|
1220 |
+
"timestamp": datetime.now().isoformat()
|
1221 |
+
}, session_id)
|
1222 |
+
|
1223 |
+
elif message_type == "typing":
|
1224 |
+
# ํ์ดํ ์ํ ์ ์ก
|
1225 |
+
await connection_manager.send_session_message({
|
1226 |
+
"type": "user_typing",
|
1227 |
+
"user_id": user_id,
|
1228 |
+
"session_id": message_data.get("session_id"),
|
1229 |
+
"timestamp": datetime.now().isoformat()
|
1230 |
+
}, message_data.get("session_id"), exclude_user=user_id)
|
1231 |
+
|
1232 |
+
elif message_type == "join_session":
|
1233 |
+
# ์ธ์
์ฐธ์ฌ
|
1234 |
+
new_session_id = message_data.get("session_id")
|
1235 |
+
if new_session_id:
|
1236 |
+
# ๊ธฐ์กด ์ธ์
์์ ์ ๊ฑฐ
|
1237 |
+
if user_id in connection_manager.connection_info:
|
1238 |
+
old_session_id = connection_manager.connection_info[user_id].get("session_id")
|
1239 |
+
if old_session_id and old_session_id in connection_manager.session_connections:
|
1240 |
+
connection_manager.session_connections[old_session_id].discard(user_id)
|
1241 |
+
|
1242 |
+
# ์ ์ธ์
์ ์ถ๊ฐ
|
1243 |
+
if new_session_id not in connection_manager.session_connections:
|
1244 |
+
connection_manager.session_connections[new_session_id] = set()
|
1245 |
+
connection_manager.session_connections[new_session_id].add(user_id)
|
1246 |
+
|
1247 |
+
# ์ฐ๊ฒฐ ์ ๋ณด ์
๋ฐ์ดํธ
|
1248 |
+
if user_id in connection_manager.connection_info:
|
1249 |
+
connection_manager.connection_info[user_id]["session_id"] = new_session_id
|
1250 |
+
|
1251 |
+
# ์ธ์
์ฐธ์ฌ ์๋ฆผ
|
1252 |
+
await connection_manager.send_session_message({
|
1253 |
+
"type": "user_joined_session",
|
1254 |
+
"user_id": user_id,
|
1255 |
+
"session_id": new_session_id,
|
1256 |
+
"timestamp": datetime.now().isoformat()
|
1257 |
+
}, new_session_id, exclude_user=user_id)
|
1258 |
+
|
1259 |
+
logger.info(f"๐จ WebSocket ๋ฉ์์ง ์ฒ๋ฆฌ: {user_id} - {message_type}")
|
1260 |
+
|
1261 |
+
except WebSocketDisconnect:
|
1262 |
+
logger.info(f"๐ WebSocket ์ฐ๊ฒฐ ๋๊น: {user_id}")
|
1263 |
+
break
|
1264 |
+
except json.JSONDecodeError:
|
1265 |
+
logger.warning(f"โ ๏ธ ์๋ชป๋ JSON ํ์: {user_id}")
|
1266 |
+
await websocket.send_text(json.dumps({
|
1267 |
+
"type": "error",
|
1268 |
+
"message": "์๋ชป๋ ๋ฉ์์ง ํ์์
๋๋ค."
|
1269 |
+
}))
|
1270 |
+
except Exception as e:
|
1271 |
+
logger.error(f"โ WebSocket ๋ฉ์์ง ์ฒ๋ฆฌ ์ค๋ฅ: {e}")
|
1272 |
+
await websocket.send_text(json.dumps({
|
1273 |
+
"type": "error",
|
1274 |
+
"message": "๋ฉ์์ง ์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค."
|
1275 |
+
}))
|
1276 |
+
|
1277 |
+
except WebSocketDisconnect:
|
1278 |
+
logger.info(f"๐ WebSocket ์ฐ๊ฒฐ ๋๊น: {user_id}")
|
1279 |
+
except Exception as e:
|
1280 |
+
logger.error(f"โ WebSocket ์๋ํฌ์ธํธ ์ค๋ฅ: {e}")
|
1281 |
+
finally:
|
1282 |
+
# ์ฐ๊ฒฐ ํด์
|
1283 |
+
connection_manager.disconnect(user_id)
|
1284 |
+
|
1285 |
+
# ์ฐ๊ฒฐ ํด์ ์๋ฆผ
|
1286 |
+
await connection_manager.broadcast_message({
|
1287 |
+
"type": "user_disconnected",
|
1288 |
+
"user_id": user_id,
|
1289 |
+
"timestamp": datetime.now().isoformat()
|
1290 |
+
}, exclude_user=user_id)
|
1291 |
+
|
1292 |
+
async def generate_ai_response(content: str, user_id: str) -> str:
|
1293 |
+
"""AI ์๋ต ์์ฑ (๊ฐ๋จํ ์์)"""
|
1294 |
+
try:
|
1295 |
+
# ํ์ฌ ๋ชจ๋ธ๋ก ์๋ต ์์ฑ
|
1296 |
+
response = await generate_sync(content, user_id)
|
1297 |
+
return response.get("response", "์ฃ์กํฉ๋๋ค. ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค.")
|
1298 |
+
except Exception as e:
|
1299 |
+
logger.error(f"โ AI ์๋ต ์์ฑ ์คํจ: {e}")
|
1300 |
+
return "์ฃ์กํฉ๋๋ค. ์๋ต์ ์์ฑํ ์ ์์ต๋๋ค."
|
1301 |
+
|
1302 |
+
# WebSocket ์ํ ์กฐํ ์๋ํฌ์ธํธ
|
1303 |
+
@app.get("/ws/status")
|
1304 |
+
async def get_websocket_status():
|
1305 |
+
"""WebSocket ์ฐ๊ฒฐ ์ํ ์กฐํ"""
|
1306 |
+
return {
|
1307 |
+
"active_connections": connection_manager.get_connection_count(),
|
1308 |
+
"active_users": connection_manager.get_active_users(),
|
1309 |
+
"sessions": list(connection_manager.session_connections.keys())
|
1310 |
+
}
|
1311 |
+
|
1312 |
+
# Celery ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์๋ํฌ์ธํธ
|
1313 |
+
@app.post("/tasks/document/process")
|
1314 |
+
async def start_document_processing(
|
1315 |
+
user_id: str = Form(...),
|
1316 |
+
document_id: str = Form(...),
|
1317 |
+
file_path: str = Form(...),
|
1318 |
+
file_type: str = Form(...)
|
1319 |
+
):
|
1320 |
+
"""๋ฌธ์ ์ฒ๋ฆฌ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์์"""
|
1321 |
+
try:
|
1322 |
+
task = process_document_async.delay(user_id, document_id, file_path, file_type)
|
1323 |
+
|
1324 |
+
return {
|
1325 |
+
"success": True,
|
1326 |
+
"task_id": task.id,
|
1327 |
+
"status": "started",
|
1328 |
+
"message": "๋ฌธ์ ์ฒ๋ฆฌ ์์
์ด ์์๋์์ต๋๋ค."
|
1329 |
+
}
|
1330 |
+
except Exception as e:
|
1331 |
+
logger.error(f"โ ๋ฌธ์ ์ฒ๋ฆฌ ์์
์์ ์คํจ: {e}")
|
1332 |
+
return {
|
1333 |
+
"success": False,
|
1334 |
+
"error": str(e)
|
1335 |
+
}
|
1336 |
+
|
1337 |
+
@app.post("/tasks/ai/generate")
|
1338 |
+
async def start_ai_generation(
|
1339 |
+
user_id: str = Form(...),
|
1340 |
+
session_id: str = Form(...),
|
1341 |
+
prompt: str = Form(...),
|
1342 |
+
model_id: Optional[str] = Form(None)
|
1343 |
+
):
|
1344 |
+
"""AI ์๋ต ์์ฑ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์์"""
|
1345 |
+
try:
|
1346 |
+
task = generate_ai_response_async.delay(user_id, session_id, prompt, model_id)
|
1347 |
+
|
1348 |
+
return {
|
1349 |
+
"success": True,
|
1350 |
+
"task_id": task.id,
|
1351 |
+
"status": "started",
|
1352 |
+
"message": "AI ์๋ต ์์ฑ ์์
์ด ์์๋์์ต๋๋ค."
|
1353 |
+
}
|
1354 |
+
except Exception as e:
|
1355 |
+
logger.error(f"โ AI ์๋ต ์์ฑ ์์
์์ ์คํจ: {e}")
|
1356 |
+
return {
|
1357 |
+
"success": False,
|
1358 |
+
"error": str(e)
|
1359 |
+
}
|
1360 |
+
|
1361 |
+
@app.post("/tasks/rag/query")
|
1362 |
+
async def start_rag_query(
|
1363 |
+
user_id: str = Form(...),
|
1364 |
+
query: str = Form(...),
|
1365 |
+
document_id: str = Form(...)
|
1366 |
+
):
|
1367 |
+
"""RAG ์ฟผ๋ฆฌ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์์"""
|
1368 |
+
try:
|
1369 |
+
task = rag_query_async.delay(user_id, query, document_id)
|
1370 |
+
|
1371 |
+
return {
|
1372 |
+
"success": True,
|
1373 |
+
"task_id": task.id,
|
1374 |
+
"status": "started",
|
1375 |
+
"message": "RAG ์ฟผ๋ฆฌ ์์
์ด ์์๋์์ต๋๋ค."
|
1376 |
+
}
|
1377 |
+
except Exception as e:
|
1378 |
+
logger.error(f"โ RAG ์ฟผ๋ฆฌ ์์
์์ ์คํจ: {e}")
|
1379 |
+
return {
|
1380 |
+
"success": False,
|
1381 |
+
"error": str(e)
|
1382 |
+
}
|
1383 |
+
|
1384 |
+
@app.post("/tasks/documents/batch")
|
1385 |
+
async def start_batch_processing(
|
1386 |
+
user_id: str = Form(...),
|
1387 |
+
document_ids: str = Form(...) # JSON ๋ฌธ์์ด๋ก ์ ๋ฌ
|
1388 |
+
):
|
1389 |
+
"""๋ฌธ์ ์ผ๊ด ์ฒ๋ฆฌ ๋ฐฑ๊ทธ๋ผ์ด๋ ์์
์์"""
|
1390 |
+
try:
|
1391 |
+
import json
|
1392 |
+
doc_ids = json.loads(document_ids)
|
1393 |
+
|
1394 |
+
task = batch_process_documents_async.delay(user_id, doc_ids)
|
1395 |
+
|
1396 |
+
return {
|
1397 |
+
"success": True,
|
1398 |
+
"task_id": task.id,
|
1399 |
+
"status": "started",
|
1400 |
+
"message": f"๋ฌธ์ ์ผ๊ด ์ฒ๋ฆฌ ์์
์ด ์์๋์์ต๋๋ค. ({len(doc_ids)}๊ฐ ๋ฌธ์)"
|
1401 |
+
}
|
1402 |
+
except Exception as e:
|
1403 |
+
logger.error(f"โ ๋ฌธ์ ์ผ๊ด ์ฒ๋ฆฌ ์์
์์ ์คํจ: {e}")
|
1404 |
+
return {
|
1405 |
+
"success": False,
|
1406 |
+
"error": str(e)
|
1407 |
+
}
|
1408 |
+
|
1409 |
+
@app.get("/tasks/{task_id}")
|
1410 |
+
async def get_task_status_endpoint(task_id: str):
|
1411 |
+
"""์์
์ํ ์กฐํ"""
|
1412 |
+
try:
|
1413 |
+
status = get_task_status(task_id)
|
1414 |
+
|
1415 |
+
if status:
|
1416 |
+
return {
|
1417 |
+
"success": True,
|
1418 |
+
"task_id": task_id,
|
1419 |
+
"status": status["status"],
|
1420 |
+
"result": status["result"],
|
1421 |
+
"info": status["info"]
|
1422 |
+
}
|
1423 |
+
else:
|
1424 |
+
return {
|
1425 |
+
"success": False,
|
1426 |
+
"error": "์์
์ ์ฐพ์ ์ ์์ต๋๋ค."
|
1427 |
+
}
|
1428 |
+
except Exception as e:
|
1429 |
+
logger.error(f"โ ์์
์ํ ์กฐํ ์คํจ: {e}")
|
1430 |
+
return {
|
1431 |
+
"success": False,
|
1432 |
+
"error": str(e)
|
1433 |
+
}
|
1434 |
+
|
1435 |
+
@app.delete("/tasks/{task_id}")
|
1436 |
+
async def cancel_task_endpoint(task_id: str):
|
1437 |
+
"""์์
์ทจ์"""
|
1438 |
+
try:
|
1439 |
+
success = cancel_task(task_id)
|
1440 |
+
|
1441 |
+
if success:
|
1442 |
+
return {
|
1443 |
+
"success": True,
|
1444 |
+
"task_id": task_id,
|
1445 |
+
"message": "์์
์ด ์ทจ์๋์์ต๋๋ค."
|
1446 |
+
}
|
1447 |
+
else:
|
1448 |
+
return {
|
1449 |
+
"success": False,
|
1450 |
+
"error": "์์
์ทจ์์ ์คํจํ์ต๋๋ค."
|
1451 |
+
}
|
1452 |
+
except Exception as e:
|
1453 |
+
logger.error(f"โ ์์
์ทจ์ ์คํจ: {e}")
|
1454 |
+
return {
|
1455 |
+
"success": False,
|
1456 |
+
"error": str(e)
|
1457 |
+
}
|
1458 |
+
|
1459 |
+
# ์ฑ๋ฅ ๋ชจ๋ํฐ๋ง ์๋ํฌ์ธํธ
|
1460 |
+
@app.post("/monitoring/start")
|
1461 |
+
async def start_performance_monitoring():
|
1462 |
+
"""์ฑ๋ฅ ๋ชจ๋ํฐ๋ง ์์"""
|
1463 |
+
try:
|
1464 |
+
performance_monitor.start_monitoring()
|
1465 |
+
return {"message": "์ฑ๋ฅ ๋ชจ๋ํฐ๋ง์ด ์์๋์์ต๋๋ค."}
|
1466 |
+
except Exception as e:
|
1467 |
+
logger.error(f"๋ชจ๋ํฐ๋ง ์์ ์คํจ: {e}")
|
1468 |
+
raise HTTPException(status_code=500, detail=f"๋ชจ๋ํฐ๋ง ์์ ์คํจ: {str(e)}")
|
1469 |
+
|
1470 |
+
@app.post("/monitoring/stop")
|
1471 |
+
async def stop_performance_monitoring():
|
1472 |
+
"""์ฑ๋ฅ ๋ชจ๋ํฐ๋ง ์ค์ง"""
|
1473 |
+
try:
|
1474 |
+
performance_monitor.stop_monitoring()
|
1475 |
+
return {"message": "์ฑ๋ฅ ๋ชจ๋ํฐ๋ง์ด ์ค์ง๋์์ต๋๋ค."}
|
1476 |
+
except Exception as e:
|
1477 |
+
logger.error(f"๋ชจ๋ํฐ๋ง ์ค์ง ์คํจ: {e}")
|
1478 |
+
raise HTTPException(status_code=500, detail=f"๋ชจ๋ํฐ๋ง ์ค์ง ์คํจ: {str(e)}")
|
1479 |
+
|
1480 |
+
@app.get("/monitoring/status")
|
1481 |
+
async def get_monitoring_status():
|
1482 |
+
"""๋ชจ๋ํฐ๋ง ์ํ ์กฐํ"""
|
1483 |
+
try:
|
1484 |
+
summary = performance_monitor.get_performance_summary()
|
1485 |
+
return summary
|
1486 |
+
except Exception as e:
|
1487 |
+
logger.error(f"๋ชจ๋๏ฟฝ๏ฟฝ๋ง ์ํ ์กฐํ ์คํจ: {e}")
|
1488 |
+
raise HTTPException(status_code=500, detail=f"๋ชจ๋ํฐ๋ง ์ํ ์กฐํ ์คํจ: {str(e)}")
|
1489 |
+
|
1490 |
+
@app.get("/monitoring/health")
|
1491 |
+
async def get_system_health():
|
1492 |
+
"""์์คํ
๊ฑด๊ฐ ์ํ ์กฐํ"""
|
1493 |
+
try:
|
1494 |
+
health = performance_monitor.get_system_health()
|
1495 |
+
return {
|
1496 |
+
"status": health.status,
|
1497 |
+
"cpu_health": health.cpu_health,
|
1498 |
+
"memory_health": health.memory_health,
|
1499 |
+
"disk_health": health.disk_health,
|
1500 |
+
"network_health": health.network_health,
|
1501 |
+
"recommendations": health.recommendations
|
1502 |
+
}
|
1503 |
+
except Exception as e:
|
1504 |
+
logger.error(f"์์คํ
๊ฑด๊ฐ ์ํ ์กฐํ ์คํจ: {e}")
|
1505 |
+
raise HTTPException(status_code=500, detail=f"์์คํ
๊ฑด๊ฐ ์ํ ์กฐํ ์คํจ: {str(e)}")
|
1506 |
+
|
1507 |
+
@app.post("/monitoring/export")
|
1508 |
+
async def export_performance_metrics(file_path: str = "performance_metrics.json"):
|
1509 |
+
"""์ฑ๋ฅ ๋ฉํธ๋ฆญ ๋ด๋ณด๋ด๊ธฐ"""
|
1510 |
+
try:
|
1511 |
+
performance_monitor.export_metrics(file_path)
|
1512 |
+
return {"message": f"์ฑ๋ฅ ๋ฉํธ๋ฆญ์ด {file_path}์ ์ ์ฅ๋์์ต๋๋ค."}
|
1513 |
+
except Exception as e:
|
1514 |
+
logger.error(f"๋ฉํธ๋ฆญ ๋ด๋ณด๋ด๊ธฐ ์คํจ: {e}")
|
1515 |
+
raise HTTPException(status_code=500, detail=f"๋ฉํธ๋ฆญ ๋ด๋ณด๋ด๊ธฐ ์คํจ: {str(e)}")
|
1516 |
+
|
1517 |
+
# ============================================================================
|
1518 |
+
# ์ด๋ฏธ์ง OCR ์ ์ฉ API ์๋ํฌ์ธํธ (๊ธฐ์กด ํ
์คํธ ๊ธฐ๋ฐ ์์คํ
๊ณผ ์์ ํ ๋ถ๋ฆฌ)
|
1519 |
+
# ============================================================================
|
1520 |
+
|
1521 |
+
@app.post("/image-ocr/upload", response_model=DocumentUploadResponse)
|
1522 |
+
async def upload_image_document(
|
1523 |
+
file: UploadFile = File(...),
|
1524 |
+
user_id: str = Form("default_user"),
|
1525 |
+
document_id: Optional[str] = Form(None)
|
1526 |
+
):
|
1527 |
+
"""์ด๋ฏธ์ง OCR ์ ์ฉ ๋ฌธ์ ์
๋ก๋"""
|
1528 |
+
start_time = time.time()
|
1529 |
+
|
1530 |
+
try:
|
1531 |
+
# ๋ฌธ์ ID ์์ฑ (์ ๊ณต๋์ง ์์ ๊ฒฝ์ฐ)
|
1532 |
+
if not document_id:
|
1533 |
+
import uuid
|
1534 |
+
document_id = str(uuid.uuid4())[:8]
|
1535 |
+
|
1536 |
+
# ์์ ํ์ผ ์ ์ฅ
|
1537 |
+
temp_file_path = f"./temp_image_{document_id}_{file.filename}"
|
1538 |
+
with open(temp_file_path, "wb") as f:
|
1539 |
+
content = await file.read()
|
1540 |
+
f.write(content)
|
1541 |
+
|
1542 |
+
# ์ด๋ฏธ์ง OCR ์ฒ๋ฆฌ ๋ฐ ๋ฒกํฐ ์คํ ์ด์ ์ ์ฅ
|
1543 |
+
result = image_rag_processor.process_and_store_image_document(
|
1544 |
+
user_id, document_id, temp_file_path
|
1545 |
+
)
|
1546 |
+
|
1547 |
+
# ์์ ํ์ผ ์ญ์
|
1548 |
+
import os
|
1549 |
+
if os.path.exists(temp_file_path):
|
1550 |
+
os.remove(temp_file_path)
|
1551 |
+
|
1552 |
+
processing_time = time.time() - start_time
|
1553 |
+
logger.info(f"๐ผ๏ธ ์ด๋ฏธ์ง OCR ๋ฌธ์ ์
๋ก๋ ์๋ฃ ({processing_time:.2f}์ด): {file.filename}")
|
1554 |
+
|
1555 |
+
return DocumentUploadResponse(
|
1556 |
+
success=result["success"],
|
1557 |
+
document_id=document_id,
|
1558 |
+
message=result.get("message", ""),
|
1559 |
+
chunks=result.get("chunks"),
|
1560 |
+
latex_count=result.get("latex_count"),
|
1561 |
+
error=result.get("error"),
|
1562 |
+
auto_response=result.get("auto_response", "")
|
1563 |
+
)
|
1564 |
+
|
1565 |
+
except Exception as e:
|
1566 |
+
logger.error(f"โ ์ด๋ฏธ์ง OCR ๋ฌธ์ ์
๋ก๋ ์คํจ: {e}")
|
1567 |
+
return DocumentUploadResponse(
|
1568 |
+
success=False,
|
1569 |
+
document_id=document_id if 'document_id' in locals() else "unknown",
|
1570 |
+
message="์ด๋ฏธ์ง OCR ๋ฌธ์ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
1571 |
+
error=str(e)
|
1572 |
+
)
|
1573 |
+
|
1574 |
+
@app.post("/image-ocr/generate", response_model=RAGResponse)
|
1575 |
+
async def generate_image_ocr_response(
|
1576 |
+
query: str = Form(...),
|
1577 |
+
user_id: str = Form("default_user"),
|
1578 |
+
document_id: str = Form(...)
|
1579 |
+
):
|
1580 |
+
"""์ด๋ฏธ์ง OCR ๊ธฐ๋ฐ RAG ์๋ต ์์ฑ"""
|
1581 |
+
start_time = time.time()
|
1582 |
+
|
1583 |
+
try:
|
1584 |
+
# ์ด๋ฏธ์ง OCR RAG ์๋ต ์์ฑ
|
1585 |
+
result = image_rag_processor.generate_image_rag_response(
|
1586 |
+
user_id, document_id, query
|
1587 |
+
)
|
1588 |
+
|
1589 |
+
processing_time = time.time() - start_time
|
1590 |
+
result["processing_time"] = processing_time
|
1591 |
+
|
1592 |
+
logger.info(f"๐ผ๏ธ ์ด๋ฏธ์ง OCR RAG ์๋ต ์์ฑ ์๋ฃ ({processing_time:.2f}์ด)")
|
1593 |
+
return result
|
1594 |
+
|
1595 |
+
except Exception as e:
|
1596 |
+
logger.error(f"โ ์ด๋ฏธ์ง OCR RAG ์๋ต ์์ฑ ์คํจ: {e}")
|
1597 |
+
return RAGResponse(
|
1598 |
+
success=False,
|
1599 |
+
response=f"์ด๋ฏธ์ง OCR RAG ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
1600 |
+
context="",
|
1601 |
+
sources=[],
|
1602 |
+
search_results=0,
|
1603 |
+
processing_time=time.time() - start_time
|
1604 |
+
)
|
1605 |
+
|
1606 |
+
@app.get("/image-ocr/document/{user_id}/{document_id}")
|
1607 |
+
async def get_image_document_info(user_id: str, document_id: str):
|
1608 |
+
"""์ด๋ฏธ์ง OCR ๋ฌธ์ ์ ๋ณด ์กฐํ"""
|
1609 |
+
try:
|
1610 |
+
result = image_rag_processor.get_image_document_info(user_id, document_id)
|
1611 |
+
return result
|
1612 |
+
except Exception as e:
|
1613 |
+
logger.error(f"โ ์ด๋ฏธ์ง OCR ๋ฌธ์ ์ ๋ณด ๏ฟฝ๏ฟฝ๏ฟฝํ ์คํจ: {e}")
|
1614 |
+
return {
|
1615 |
+
"success": False,
|
1616 |
+
"error": str(e)
|
1617 |
+
}
|
1618 |
+
|
1619 |
+
@app.delete("/image-ocr/document/{user_id}/{document_id}")
|
1620 |
+
async def delete_image_document(user_id: str, document_id: str):
|
1621 |
+
"""์ด๋ฏธ์ง OCR ๋ฌธ์ ์ญ์ """
|
1622 |
+
try:
|
1623 |
+
# ๋ฒกํฐ ์คํ ์ด์์ ๋ฌธ์ ์ญ์
|
1624 |
+
success = vector_store_manager.delete_document(user_id, document_id)
|
1625 |
+
|
1626 |
+
if success:
|
1627 |
+
return {
|
1628 |
+
"success": True,
|
1629 |
+
"message": "์ด๋ฏธ์ง OCR ๋ฌธ์๊ฐ ์ญ์ ๋์์ต๋๋ค."
|
1630 |
+
}
|
1631 |
+
else:
|
1632 |
+
return {
|
1633 |
+
"success": False,
|
1634 |
+
"error": "์ด๋ฏธ์ง OCR ๋ฌธ์ ์ญ์ ์ ์คํจํ์ต๋๋ค."
|
1635 |
+
}
|
1636 |
+
except Exception as e:
|
1637 |
+
logger.error(f"โ ์ด๋ฏธ์ง OCR ๋ฌธ์ ์ญ์ ์คํจ: {e}")
|
1638 |
+
return {
|
1639 |
+
"success": False,
|
1640 |
+
"error": str(e)
|
1641 |
+
}
|
1642 |
+
|
1643 |
+
# ============================================================================
|
1644 |
+
# LaTeX-OCR ์ ์ฉ API ์๋ํฌ์ธํธ (์ํ ์์ ์ธ์ ๊ธฐ๋ฅ ํฌํจ)
|
1645 |
+
# ============================================================================
|
1646 |
+
|
1647 |
+
@app.post("/latex-ocr/upload", response_model=DocumentUploadResponse)
|
1648 |
+
async def upload_latex_document(
|
1649 |
+
file: UploadFile = File(...),
|
1650 |
+
user_id: str = Form("default_user"),
|
1651 |
+
document_id: Optional[str] = Form(None)
|
1652 |
+
):
|
1653 |
+
"""LaTeX-OCR ์ ์ฉ ๋ฌธ์ ์
๋ก๋"""
|
1654 |
+
start_time = time.time()
|
1655 |
+
|
1656 |
+
try:
|
1657 |
+
# ๋ฌธ์ ID ์์ฑ (์ ๊ณต๋์ง ์์ ๊ฒฝ์ฐ)
|
1658 |
+
if not document_id:
|
1659 |
+
import uuid
|
1660 |
+
document_id = str(uuid.uuid4())[:8]
|
1661 |
+
|
1662 |
+
# ์์ ํ์ผ ์ ์ฅ
|
1663 |
+
temp_file_path = f"./temp_latex_{document_id}_{file.filename}"
|
1664 |
+
with open(temp_file_path, "wb") as f:
|
1665 |
+
content = await file.read()
|
1666 |
+
f.write(content)
|
1667 |
+
|
1668 |
+
# LaTeX-OCR ์ฒ๋ฆฌ ๋ฐ ๋ฒกํฐ ์คํ ์ด์ ์ ์ฅ
|
1669 |
+
result = latex_rag_processor.process_and_store_latex_document(
|
1670 |
+
user_id, document_id, temp_file_path
|
1671 |
+
)
|
1672 |
+
|
1673 |
+
# ์์ ํ์ผ ์ญ์
|
1674 |
+
import os
|
1675 |
+
if os.path.exists(temp_file_path):
|
1676 |
+
os.remove(temp_file_path)
|
1677 |
+
|
1678 |
+
processing_time = time.time() - start_time
|
1679 |
+
logger.info(f"๐งฎ LaTeX-OCR ๋ฌธ์ ์
๋ก๋ ์๋ฃ ({processing_time:.2f}์ด): {file.filename}")
|
1680 |
+
|
1681 |
+
return DocumentUploadResponse(
|
1682 |
+
success=result["success"],
|
1683 |
+
document_id=document_id,
|
1684 |
+
message=result.get("message", ""),
|
1685 |
+
chunks=result.get("chunks"),
|
1686 |
+
latex_count=result.get("latex_count"),
|
1687 |
+
error=result.get("error"),
|
1688 |
+
auto_response=result.get("auto_response", "")
|
1689 |
+
)
|
1690 |
+
|
1691 |
+
except Exception as e:
|
1692 |
+
logger.error(f"โ LaTeX-OCR ๋ฌธ์ ์
๋ก๋ ์คํจ: {e}")
|
1693 |
+
return DocumentUploadResponse(
|
1694 |
+
success=False,
|
1695 |
+
document_id=document_id if 'document_id' in locals() else "unknown",
|
1696 |
+
message="LaTeX-OCR ๋ฌธ์ ์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
1697 |
+
error=str(e)
|
1698 |
+
)
|
1699 |
+
|
1700 |
+
@app.post("/latex-ocr/generate", response_model=RAGResponse)
|
1701 |
+
async def generate_latex_ocr_response(
|
1702 |
+
query: str = Form(...),
|
1703 |
+
user_id: str = Form("default_user"),
|
1704 |
+
document_id: str = Form(...)
|
1705 |
+
):
|
1706 |
+
"""LaTeX-OCR ๊ธฐ๋ฐ RAG ์๋ต ์์ฑ"""
|
1707 |
+
start_time = time.time()
|
1708 |
+
|
1709 |
+
try:
|
1710 |
+
# LaTeX-OCR RAG ์๋ต ์์ฑ
|
1711 |
+
result = latex_rag_processor.generate_latex_rag_response(
|
1712 |
+
user_id, document_id, query
|
1713 |
+
)
|
1714 |
+
|
1715 |
+
processing_time = time.time() - start_time
|
1716 |
+
result["processing_time"] = processing_time
|
1717 |
+
|
1718 |
+
logger.info(f"๐งฎ LaTeX-OCR RAG ์๋ต ์์ฑ ์๋ฃ ({processing_time:.2f}์ด)")
|
1719 |
+
return result
|
1720 |
+
|
1721 |
+
except Exception as e:
|
1722 |
+
logger.error(f"โ LaTeX-OCR RAG ์๋ต ์์ฑ ์คํจ: {e}")
|
1723 |
+
return RAGResponse(
|
1724 |
+
success=False,
|
1725 |
+
response=f"LaTeX-OCR RAG ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
1726 |
+
context="",
|
1727 |
+
sources=[],
|
1728 |
+
search_results=0,
|
1729 |
+
processing_time=time.time() - start_time
|
1730 |
+
)
|
1731 |
+
|
1732 |
+
@app.get("/latex-ocr/document/{user_id}/{document_id}")
|
1733 |
+
async def get_latex_document_info(user_id: str, document_id: str):
|
1734 |
+
"""LaTeX-OCR ๋ฌธ์ ์ ๋ณด ์กฐํ"""
|
1735 |
+
try:
|
1736 |
+
result = latex_rag_processor.get_latex_document_info(user_id, document_id)
|
1737 |
+
return result
|
1738 |
+
except Exception as e:
|
1739 |
+
logger.error(f"โ LaTeX-OCR ๋ฌธ์ ์ ๋ณด ์กฐํ ์คํจ: {e}")
|
1740 |
+
return {
|
1741 |
+
"success": False,
|
1742 |
+
"error": str(e)
|
1743 |
+
}
|
1744 |
+
|
1745 |
+
@app.delete("/latex-ocr/document/{user_id}/{document_id}")
|
1746 |
+
async def delete_latex_document(user_id: str, document_id: str):
|
1747 |
+
"""LaTeX-OCR ๋ฌธ์ ์ญ์ """
|
1748 |
+
try:
|
1749 |
+
# ๋ฒกํฐ ์คํ ์ด์์ ๋ฌธ์ ์ญ์
|
1750 |
+
success = vector_store_manager.delete_document(user_id, document_id)
|
1751 |
+
|
1752 |
+
if success:
|
1753 |
+
return {
|
1754 |
+
"success": True,
|
1755 |
+
"message": "LaTeX-OCR ๋ฌธ์๊ฐ ์ญ์ ๋์์ต๋๋ค."
|
1756 |
+
}
|
1757 |
+
else:
|
1758 |
+
return {
|
1759 |
+
"success": False,
|
1760 |
+
"error": "LaTeX-OCR ๋ฌธ์ ์ญ์ ์ ์คํจํ์ต๋๋ค."
|
1761 |
+
}
|
1762 |
+
except Exception as e:
|
1763 |
+
logger.error(f"โ LaTeX-OCR ๋ฌธ์ ์ญ์ ์คํจ: {e}")
|
1764 |
+
return {
|
1765 |
+
"success": False,
|
1766 |
+
"error": str(e)
|
1767 |
+
}
|
1768 |
+
|
1769 |
+
# ============================================================================
|
1770 |
+
# LaTeX-OCR + FAISS ํตํฉ ์์คํ
์๋ํฌ์ธํธ
|
1771 |
+
# ============================================================================
|
1772 |
+
|
1773 |
+
# LaTeX-OCR + FAISS ์์คํ
์ด๊ธฐํ
|
1774 |
+
latex_ocr_faiss_simple = None
|
1775 |
+
latex_ocr_faiss_integrated = None
|
1776 |
+
|
1777 |
+
def init_latex_ocr_faiss_systems():
|
1778 |
+
"""LaTeX-OCR + FAISS ์์คํ
์ด๊ธฐํ"""
|
1779 |
+
global latex_ocr_faiss_simple, latex_ocr_faiss_integrated
|
1780 |
+
try:
|
1781 |
+
latex_ocr_faiss_simple = LatexOCRFAISSSimple()
|
1782 |
+
latex_ocr_faiss_integrated = LatexOCRFAISSIntegrated()
|
1783 |
+
logger.info("โ
LaTeX-OCR + FAISS ์์คํ
์ด๊ธฐํ ์๋ฃ")
|
1784 |
+
except Exception as e:
|
1785 |
+
logger.error(f"โ LaTeX-OCR + FAISS ์์คํ
์ด๊ธฐํ ์คํจ: {e}")
|
1786 |
+
|
1787 |
+
@app.post("/latex-ocr-faiss/process", response_model=DocumentUploadResponse)
|
1788 |
+
async def process_pdf_with_latex_faiss(
|
1789 |
+
file: UploadFile = File(...),
|
1790 |
+
user_id: str = Form("default_user"),
|
1791 |
+
system_type: str = Form("simple") # "simple" ๋๋ "integrated"
|
1792 |
+
):
|
1793 |
+
"""PDF์์ LaTeX ์์ ์ถ์ถ ๋ฐ FAISS ์ ์ฅ"""
|
1794 |
+
try:
|
1795 |
+
# ํ์ผ ์ ์ฅ
|
1796 |
+
upload_dir = Path("uploads/latex_ocr_faiss")
|
1797 |
+
upload_dir.mkdir(parents=True, exist_ok=True)
|
1798 |
+
|
1799 |
+
file_path = upload_dir / f"{user_id}_{file.filename}"
|
1800 |
+
with open(file_path, "wb") as f:
|
1801 |
+
content = await file.read()
|
1802 |
+
f.write(content)
|
1803 |
+
|
1804 |
+
# ์์คํ
์ ํ
|
1805 |
+
if system_type == "simple":
|
1806 |
+
if not latex_ocr_faiss_simple:
|
1807 |
+
init_latex_ocr_faiss_systems()
|
1808 |
+
system = latex_ocr_faiss_simple
|
1809 |
+
else:
|
1810 |
+
if not latex_ocr_faiss_integrated:
|
1811 |
+
init_latex_ocr_faiss_systems()
|
1812 |
+
system = latex_ocr_faiss_integrated
|
1813 |
+
|
1814 |
+
# PDF ์ฒ๋ฆฌ
|
1815 |
+
result = system.process_pdf_with_latex(str(file_path), user_id)
|
1816 |
+
|
1817 |
+
if result["success"]:
|
1818 |
+
return DocumentUploadResponse(
|
1819 |
+
success=True,
|
1820 |
+
document_id=f"latex_ocr_faiss_{user_id}_{file.filename}",
|
1821 |
+
message=f"LaTeX ์์ {result['latex_count']}๊ฐ ์ถ์ถ ์๋ฃ",
|
1822 |
+
chunks=result['latex_count'],
|
1823 |
+
latex_count=result['latex_count']
|
1824 |
+
)
|
1825 |
+
else:
|
1826 |
+
return DocumentUploadResponse(
|
1827 |
+
success=False,
|
1828 |
+
document_id="",
|
1829 |
+
message="LaTeX ์์ ์ถ์ถ ์คํจ",
|
1830 |
+
error=result.get("error", "LaTeX ์์ ์ถ์ถ ์คํจ")
|
1831 |
+
)
|
1832 |
+
|
1833 |
+
except Exception as e:
|
1834 |
+
logger.error(f"LaTeX-OCR + FAISS ์ฒ๋ฆฌ ์ค๋ฅ: {e}")
|
1835 |
+
return DocumentUploadResponse(
|
1836 |
+
success=False,
|
1837 |
+
document_id="",
|
1838 |
+
message="์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค",
|
1839 |
+
error=f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
1840 |
+
)
|
1841 |
+
|
1842 |
+
@app.post("/latex-ocr-faiss/search", response_model=RAGResponse)
|
1843 |
+
async def search_latex_formulas(
|
1844 |
+
query: str = Form(...),
|
1845 |
+
user_id: str = Form("default_user"),
|
1846 |
+
document_path: Optional[str] = Form(None),
|
1847 |
+
system_type: str = Form("simple"),
|
1848 |
+
k: int = Form(5)
|
1849 |
+
):
|
1850 |
+
"""์ ์ฅ๋ LaTeX ์์ ๊ฒ์"""
|
1851 |
+
try:
|
1852 |
+
# ์์คํ
์ ํ
|
1853 |
+
if system_type == "simple":
|
1854 |
+
if not latex_ocr_faiss_simple:
|
1855 |
+
init_latex_ocr_faiss_systems()
|
1856 |
+
system = latex_ocr_faiss_simple
|
1857 |
+
else:
|
1858 |
+
if not latex_ocr_faiss_integrated:
|
1859 |
+
init_latex_ocr_faiss_systems()
|
1860 |
+
system = latex_ocr_faiss_integrated
|
1861 |
+
|
1862 |
+
# ์์ ๊ฒ์
|
1863 |
+
search_result = system.search_formulas(query, user_id, document_path, k)
|
1864 |
+
|
1865 |
+
if search_result["success"]:
|
1866 |
+
# ๊ฒ์ ๊ฒฐ๊ณผ๋ฅผ ์๋ต ํ์์ผ๋ก ๋ณํ
|
1867 |
+
context = "\n".join([f"์์: {result['formula']} (์ ์ฌ๋: {result['similarity']:.3f})"
|
1868 |
+
for result in search_result['results']])
|
1869 |
+
|
1870 |
+
sources = [{"formula": result['formula'], "similarity": result['similarity'],
|
1871 |
+
"page": result.get('page', 1)} for result in search_result['results']]
|
1872 |
+
|
1873 |
+
return RAGResponse(
|
1874 |
+
success=True,
|
1875 |
+
response=f"๊ฒ์๋ ์์ {search_result['search_results']}๊ฐ๋ฅผ ์ฐพ์์ต๋๋ค.",
|
1876 |
+
context=context,
|
1877 |
+
sources=sources,
|
1878 |
+
search_results=search_result['search_results'],
|
1879 |
+
processing_time=0.0 # ์ค์ ์ฒ๋ฆฌ ์๊ฐ ์ธก์ ํ์
|
1880 |
+
)
|
1881 |
+
else:
|
1882 |
+
return RAGResponse(
|
1883 |
+
success=False,
|
1884 |
+
response="์์ ๊ฒ์์ ์คํจํ์ต๋๋ค.",
|
1885 |
+
context="",
|
1886 |
+
sources=[],
|
1887 |
+
search_results=0,
|
1888 |
+
processing_time=0.0,
|
1889 |
+
error=search_result.get("error", "๊ฒ์ ์คํจ")
|
1890 |
+
)
|
1891 |
+
|
1892 |
+
except Exception as e:
|
1893 |
+
logger.error(f"LaTeX ์์ ๊ฒ์ ์ค๋ฅ: {e}")
|
1894 |
+
return RAGResponse(
|
1895 |
+
success=False,
|
1896 |
+
response="๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.",
|
1897 |
+
context="",
|
1898 |
+
sources=[],
|
1899 |
+
search_results=0,
|
1900 |
+
processing_time=0.0,
|
1901 |
+
error=str(e)
|
1902 |
+
)
|
1903 |
+
|
1904 |
+
@app.get("/latex-ocr-faiss/status")
|
1905 |
+
async def get_latex_ocr_faiss_status():
|
1906 |
+
"""LaTeX-OCR + FAISS ์์คํ
์ํ ํ์ธ"""
|
1907 |
+
try:
|
1908 |
+
simple_status = latex_ocr_faiss_simple is not None
|
1909 |
+
integrated_status = latex_ocr_faiss_integrated is not None
|
1910 |
+
|
1911 |
+
return {
|
1912 |
+
"simple_system_initialized": simple_status,
|
1913 |
+
"integrated_system_initialized": integrated_status,
|
1914 |
+
"status": "ready" if (simple_status or integrated_status) else "not_initialized"
|
1915 |
+
}
|
1916 |
+
except Exception as e:
|
1917 |
+
logger.error(f"์ํ ํ์ธ ์ค๋ฅ: {e}")
|
1918 |
+
return {"status": "error", "error": str(e)}
|
1919 |
+
|
1920 |
+
# ============================================================================
|
1921 |
+
# ๋ฉํฐ๋ชจ๋ฌ RAG ์์คํ
์๋ํฌ์ธํธ
|
1922 |
+
# ============================================================================
|
1923 |
+
|
1924 |
+
@app.post("/hybrid-rag/upload", response_model=DocumentUploadResponse)
|
1925 |
+
async def upload_hybrid_document(
|
1926 |
+
file: UploadFile = File(...),
|
1927 |
+
user_id: str = Form("default_user"),
|
1928 |
+
document_id: Optional[str] = Form(None)
|
1929 |
+
):
|
1930 |
+
"""๋ฉํฐ๋ชจ๋ฌ RAG ๋ฌธ์ ์
๋ก๋"""
|
1931 |
+
try:
|
1932 |
+
# ํ์ผ ์ ์ฅ
|
1933 |
+
upload_dir = Path("uploads/hybrid_rag")
|
1934 |
+
upload_dir.mkdir(parents=True, exist_ok=True)
|
1935 |
+
|
1936 |
+
if not document_id:
|
1937 |
+
document_id = f"{user_id}_{int(time.time())}_{file.filename}"
|
1938 |
+
|
1939 |
+
file_path = upload_dir / document_id
|
1940 |
+
with open(file_path, "wb") as buffer:
|
1941 |
+
content = await file.read()
|
1942 |
+
buffer.write(content)
|
1943 |
+
|
1944 |
+
# ๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ
|
1945 |
+
result = hybrid_rag_processor.process_document_hybrid(str(file_path), user_id, document_id)
|
1946 |
+
|
1947 |
+
if result["success"]:
|
1948 |
+
# ์ฑ๊ณตํ ์์คํ
์ ๊ณ์ฐ
|
1949 |
+
success_systems = []
|
1950 |
+
for key, value in result.items():
|
1951 |
+
if key.endswith('_processing') and value and value.get('success', False):
|
1952 |
+
system_name = key.replace('_processing', '').replace('_', ' ').title()
|
1953 |
+
success_systems.append(system_name)
|
1954 |
+
|
1955 |
+
return DocumentUploadResponse(
|
1956 |
+
success=True,
|
1957 |
+
document_id=document_id,
|
1958 |
+
message=f"๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์๋ฃ: {', '.join(success_systems)} ์์คํ
์์ ์ฒ๋ฆฌ๋จ",
|
1959 |
+
chunks=len(success_systems)
|
1960 |
+
)
|
1961 |
+
else:
|
1962 |
+
return DocumentUploadResponse(
|
1963 |
+
success=False,
|
1964 |
+
error=result.get("error", "๋ฉํฐ๋ชจ๋ฌ ์ฒ๋ฆฌ ์คํจ")
|
1965 |
+
)
|
1966 |
+
|
1967 |
+
except Exception as e:
|
1968 |
+
logger.error(f"๋ฉํฐ๋ชจ๋ฌ RAG ๋ฌธ์ ์
๋ก๋ ์ค๋ฅ: {e}")
|
1969 |
+
return DocumentUploadResponse(
|
1970 |
+
success=False,
|
1971 |
+
error=f"์
๋ก๋ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
1972 |
+
)
|
1973 |
+
|
1974 |
+
@app.post("/hybrid-rag/generate", response_model=RAGResponse)
|
1975 |
+
async def generate_hybrid_rag_response(
|
1976 |
+
query: str = Form(...),
|
1977 |
+
user_id: str = Form("default_user"),
|
1978 |
+
document_id: str = Form(...),
|
1979 |
+
use_text: bool = Form(True),
|
1980 |
+
use_image: bool = Form(True),
|
1981 |
+
use_latex: bool = Form(True),
|
1982 |
+
use_latex_ocr: bool = Form(True),
|
1983 |
+
max_length: Optional[int] = Form(None),
|
1984 |
+
temperature: Optional[float] = Form(None),
|
1985 |
+
top_p: Optional[float] = Form(None),
|
1986 |
+
do_sample: Optional[bool] = Form(None)
|
1987 |
+
):
|
1988 |
+
"""๋ฉํฐ๋ชจ๋ฌ RAG ์๋ต ์์ฑ"""
|
1989 |
+
try:
|
1990 |
+
result = hybrid_rag_processor.generate_hybrid_response(
|
1991 |
+
query, user_id, document_id,
|
1992 |
+
use_text, use_image, use_latex, use_latex_ocr,
|
1993 |
+
max_length, temperature, top_p, do_sample
|
1994 |
+
)
|
1995 |
+
|
1996 |
+
return RAGResponse(
|
1997 |
+
success=result["success"],
|
1998 |
+
response=result["response"],
|
1999 |
+
context=result["context"],
|
2000 |
+
sources=result["sources"],
|
2001 |
+
search_results=result["search_results"],
|
2002 |
+
processing_time=result["processing_time"]
|
2003 |
+
)
|
2004 |
+
|
2005 |
+
except Exception as e:
|
2006 |
+
logger.error(f"๋ฉํฐ๋ชจ๋ฌ RAG ์๋ต ์์ฑ ์ค๋ฅ: {e}")
|
2007 |
+
return RAGResponse(
|
2008 |
+
success=False,
|
2009 |
+
response=f"๋ฉํฐ๋ชจ๋ฌ RAG ์๋ต ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
2010 |
+
context="",
|
2011 |
+
sources=[],
|
2012 |
+
search_results=0,
|
2013 |
+
processing_time=0.0
|
2014 |
+
)
|
2015 |
+
|
2016 |
+
@app.get("/hybrid-rag/document/{user_id}/{document_id}")
|
2017 |
+
async def get_hybrid_document_info(user_id: str, document_id: str):
|
2018 |
+
"""๋ฉํฐ๋ชจ๋ฌ RAG ๋ฌธ์ ์ ๋ณด ์กฐํ"""
|
2019 |
+
try:
|
2020 |
+
result = hybrid_rag_processor.get_document_info(user_id, document_id)
|
2021 |
+
return result
|
2022 |
+
except Exception as e:
|
2023 |
+
logger.error(f"๋ฉํฐ๋ชจ๋ฌ RAG ๋ฌธ์ ์ ๋ณด ์กฐํ ์ค๋ฅ: {e}")
|
2024 |
+
return {"success": False, "error": str(e)}
|
2025 |
+
|
2026 |
+
@app.get("/hybrid-rag/status")
|
2027 |
+
async def get_hybrid_rag_status():
|
2028 |
+
"""๋ฉํฐ๋ชจ๋ฌ RAG ์์คํ
์ํ ํ์ธ"""
|
2029 |
+
try:
|
2030 |
+
return {
|
2031 |
+
"text_rag_available": True,
|
2032 |
+
"image_rag_available": True,
|
2033 |
+
"latex_rag_available": True,
|
2034 |
+
"latex_ocr_faiss_available": hybrid_rag_processor.latex_ocr_faiss_integrated is not None,
|
2035 |
+
"status": "ready"
|
2036 |
+
}
|
2037 |
+
except Exception as e:
|
2038 |
+
logger.error(f"๋ฉํฐ๋ชจ๋ฌ RAG ์ํ ํ์ธ ์ค๋ฅ: {e}")
|
2039 |
+
return {"status": "error", "error": str(e)}
|
2040 |
+
|
2041 |
+
# run_server_v2.py ์์ ์ง์ ์คํ ์ ์ฃผ์ ์ฒ๋ฆฌ
|
2042 |
+
# if __name__ == "__main__":
|
2043 |
+
# uvicorn.run(
|
2044 |
+
# app,
|
2045 |
+
# host="0.0.0.0",
|
2046 |
+
# port=8001,
|
2047 |
+
# reload=False,
|
2048 |
+
# log_level="info"
|
2049 |
+
# )
|
lily_llm_api/models/__init__.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
๋ชจ๋ธ ํ๋กํ ๊ด๋ฆฌ
|
4 |
+
"""
|
5 |
+
|
6 |
+
# from .polyglot_ko_1_3b import PolyglotKo13bProfile
|
7 |
+
# from .dialogpt_medium import DialoGPTMediumProfile
|
8 |
+
# from .kanana_1_5_2_1b_instruct import Kanana15V21bInstructProfile
|
9 |
+
# from .kanana_nano_2_1b_instruct import KananaNano21bInstructProfile
|
10 |
+
# from .mistral_7b_instruct import Mistral7bInstructProfile
|
11 |
+
# from .polyglot_ko_5_8b import PolyglotKo58bProfile
|
12 |
+
from .polyglot_ko_1_3b_chat import PolyglotKo13bChatProfile
|
13 |
+
from .kanana_1_5_v_3b_instruct import Kanana15V3bInstructProfile
|
14 |
+
from .polyglot_ko_5_8b_chat import PolyglotKo58bChatProfile
|
15 |
+
|
16 |
+
# ์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ํ๋กํ๋ค
|
17 |
+
AVAILABLE_MODELS = {
|
18 |
+
# "polyglot-ko-1.3b": PolyglotKo13bProfile,
|
19 |
+
# "dialogpt-medium": DialoGPTMediumProfile,
|
20 |
+
# "kanana-1.5-2.1b-instruct": Kanana15V21bInstructProfile,
|
21 |
+
# "kanana-nano-2.1b-instruct": KananaNano21bInstructProfile,
|
22 |
+
# "mistral-7b-instruct": Mistral7bInstructProfile,
|
23 |
+
# "polyglot-ko-5.8b": PolyglotKo58bProfile,
|
24 |
+
"polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
|
25 |
+
"kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
|
26 |
+
"polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,
|
27 |
+
}
|
28 |
+
|
29 |
+
def get_model_profile(model_id: str):
|
30 |
+
"""๋ชจ๋ธ ID๋ก ํ๋กํ ๊ฐ์ ธ์ค๊ธฐ"""
|
31 |
+
if model_id not in AVAILABLE_MODELS:
|
32 |
+
raise ValueError(f"์ ์ ์๋ ๋ชจ๋ธ ID: {model_id}")
|
33 |
+
return AVAILABLE_MODELS[model_id]()
|
34 |
+
|
35 |
+
def list_available_models():
|
36 |
+
"""์ฌ์ฉ ๊ฐ๋ฅํ ๋ชจ๋ธ ๋ชฉ๋ก ๋ฐํ"""
|
37 |
+
models = []
|
38 |
+
for model_id, profile_class in AVAILABLE_MODELS.items():
|
39 |
+
profile = profile_class()
|
40 |
+
model_info = profile.get_model_info()
|
41 |
+
models.append({
|
42 |
+
"model_id": model_id,
|
43 |
+
"name": model_info["display_name"],
|
44 |
+
"description": model_info["description"],
|
45 |
+
"language": model_info["language"],
|
46 |
+
"model_size": model_info["model_size"],
|
47 |
+
"multimodal": model_info.get("multimodal", False)
|
48 |
+
})
|
49 |
+
return models
|
lily_llm_api/models/dialogpt_medium.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
DialoGPT-medium ๋ชจ๋ธ ํ๋กํ
|
4 |
+
"""
|
5 |
+
|
6 |
+
from typing import Dict, Any, Tuple
|
7 |
+
import torch
|
8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
9 |
+
import logging
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
class DialoGPTMediumProfile:
|
14 |
+
"""DialoGPT-medium ๋ชจ๋ธ ํ๋กํ"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.model_name = "microsoft/DialoGPT-medium"
|
18 |
+
self.local_path = None # ์จ๋ผ์ธ์์ ๋ก๋
|
19 |
+
self.display_name = "DialoGPT-medium"
|
20 |
+
self.description = "์์ด ๋ํํ ๋ชจ๋ธ (774M)"
|
21 |
+
self.language = "en"
|
22 |
+
self.model_size = "774M"
|
23 |
+
|
24 |
+
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
25 |
+
"""๋ชจ๋ธ ๋ก๋"""
|
26 |
+
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
27 |
+
|
28 |
+
try:
|
29 |
+
# ์จ๋ผ์ธ์์ ๋ชจ๋ธ ๋ก๋
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
31 |
+
model = AutoModelForCausalLM.from_pretrained(self.model_name)
|
32 |
+
|
33 |
+
if tokenizer.pad_token is None:
|
34 |
+
tokenizer.pad_token = tokenizer.eos_token
|
35 |
+
|
36 |
+
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต!")
|
37 |
+
return model, tokenizer
|
38 |
+
|
39 |
+
except Exception as e:
|
40 |
+
logger.error(f"โ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
41 |
+
raise
|
42 |
+
|
43 |
+
def format_prompt(self, user_input: str) -> str:
|
44 |
+
"""ํ๋กฌํํธ ํฌ๋งทํ
"""
|
45 |
+
return f"User: {user_input}\nAssistant:"
|
46 |
+
|
47 |
+
def extract_response(self, full_text: str, formatted_prompt: str) -> str:
|
48 |
+
"""์๋ต ์ถ์ถ"""
|
49 |
+
if "Assistant:" in full_text:
|
50 |
+
response = full_text.split("Assistant:")[-1].strip()
|
51 |
+
else:
|
52 |
+
if formatted_prompt in full_text:
|
53 |
+
response = full_text.replace(formatted_prompt, "").strip()
|
54 |
+
else:
|
55 |
+
response = full_text.strip()
|
56 |
+
|
57 |
+
return response
|
58 |
+
|
59 |
+
def get_generation_config(self) -> Dict[str, Any]:
|
60 |
+
"""์์ฑ ์ค์ """
|
61 |
+
return {
|
62 |
+
"max_new_tokens": 50,
|
63 |
+
"temperature": 0.9,
|
64 |
+
"do_sample": True,
|
65 |
+
"top_k": 50,
|
66 |
+
"top_p": 0.95,
|
67 |
+
"repetition_penalty": 1.1,
|
68 |
+
"no_repeat_ngram_size": 3,
|
69 |
+
"pad_token_id": None,
|
70 |
+
"eos_token_id": None
|
71 |
+
}
|
72 |
+
|
73 |
+
def get_model_info(self) -> Dict[str, Any]:
|
74 |
+
"""๋ชจ๋ธ ์ ๋ณด"""
|
75 |
+
return {
|
76 |
+
"model_name": self.model_name,
|
77 |
+
"display_name": self.display_name,
|
78 |
+
"description": self.description,
|
79 |
+
"language": self.language,
|
80 |
+
"model_size": self.model_size,
|
81 |
+
"local_path": self.local_path
|
82 |
+
}
|
lily_llm_api/models/kanana_1_5_2_1b_instruct.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Kanana 1.5 2.1B Instruct ๋ชจ๋ธ ํ๋กํ
|
4 |
+
"""
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
8 |
+
from typing import Dict, Any, Tuple
|
9 |
+
import logging
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
class Kanana15V21bInstructProfile:
|
14 |
+
"""Kanana 1.5 2.1B Instruct ๋ชจ๋ธ ํ๋กํ"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.model_name = "kakaocorp/kanana-1.5-2.1b-instruct-2505"
|
18 |
+
self.local_path = "./lily_llm_core/models/kanana-1.5-2.1b-instruct"
|
19 |
+
self.display_name = "Kanana 1.5 2.1B Instruct"
|
20 |
+
self.description = "Kakao์ Kanana 1.5 2.1B Instruct ๋ชจ๋ธ"
|
21 |
+
self.language = ["ko", "en"]
|
22 |
+
self.model_size = "2.1B"
|
23 |
+
|
24 |
+
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
25 |
+
"""๋ชจ๋ธ ๋ก๋"""
|
26 |
+
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
27 |
+
|
28 |
+
try:
|
29 |
+
# ํ ํฌ๋์ด์ ๋ก๋
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
31 |
+
self.local_path,
|
32 |
+
trust_remote_code=True,
|
33 |
+
local_files_only=True
|
34 |
+
)
|
35 |
+
|
36 |
+
# ๋ชจ๋ธ ๋ก๋
|
37 |
+
model = AutoModelForCausalLM.from_pretrained(
|
38 |
+
self.local_path,
|
39 |
+
torch_dtype=torch.float32,
|
40 |
+
device_map="cpu",
|
41 |
+
low_cpu_mem_usage=True,
|
42 |
+
local_files_only=True
|
43 |
+
)
|
44 |
+
|
45 |
+
# ํ ํฌ๋์ด์ ์ค์
|
46 |
+
if tokenizer.pad_token is None:
|
47 |
+
tokenizer.pad_token = tokenizer.eos_token
|
48 |
+
if tokenizer.eos_token is None:
|
49 |
+
tokenizer.eos_token = "</s>"
|
50 |
+
|
51 |
+
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต!")
|
52 |
+
return model, tokenizer
|
53 |
+
|
54 |
+
except Exception as e:
|
55 |
+
logger.error(f"โ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
56 |
+
raise
|
57 |
+
|
58 |
+
def format_prompt(self, user_input: str) -> str:
|
59 |
+
"""ํ๋กฌํํธ ํฌ๋งทํ
"""
|
60 |
+
return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
|
61 |
+
|
62 |
+
def extract_response(self, full_text: str, formatted_prompt: str) -> str:
|
63 |
+
"""์๋ต ์ถ์ถ"""
|
64 |
+
if "<|im_start|>assistant\n" in full_text:
|
65 |
+
response = full_text.split("<|im_start|>assistant\n")[-1]
|
66 |
+
if "<|im_end|>" in response:
|
67 |
+
response = response.split("<|im_end|>")[0]
|
68 |
+
return response.strip()
|
69 |
+
return full_text.strip()
|
70 |
+
|
71 |
+
def get_generation_config(self) -> Dict[str, Any]:
|
72 |
+
"""์์ฑ ์ค์ """
|
73 |
+
return {
|
74 |
+
"max_new_tokens": 512,
|
75 |
+
"temperature": 0.7,
|
76 |
+
"top_p": 0.9,
|
77 |
+
"do_sample": True,
|
78 |
+
"repetition_penalty": 1.1,
|
79 |
+
"no_repeat_ngram_size": 3,
|
80 |
+
"pad_token_id": None, # ํ ํฌ๋์ด์ ์์ ์ค์ ๋จ
|
81 |
+
"eos_token_id": None, # ํ ํฌ๋์ด์ ์์ ์ค์ ๋จ
|
82 |
+
}
|
83 |
+
|
84 |
+
def get_model_info(self) -> Dict[str, Any]:
|
85 |
+
"""๋ชจ๋ธ ์ ๋ณด"""
|
86 |
+
return {
|
87 |
+
"model_name": self.model_name,
|
88 |
+
"display_name": self.display_name,
|
89 |
+
"description": self.description,
|
90 |
+
"language": self.language,
|
91 |
+
"model_size": self.model_size,
|
92 |
+
"local_path": self.local_path
|
93 |
+
}
|
lily_llm_api/models/kanana_1_5_v_3b_instruct.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Kanana-1.5-v-3b-instruct ๋ชจ ๋ธ ํ๋กํ (๋จ์ ๋ก๋ฉ ์ต์ข
๋ณธ)
|
4 |
+
"""
|
5 |
+
import sys
|
6 |
+
from typing import Dict, Any, Tuple
|
7 |
+
import torch
|
8 |
+
import logging
|
9 |
+
from transformers import AutoTokenizer
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
+
|
14 |
+
class Kanana15V3bInstructProfile:
|
15 |
+
"""Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ๋กํ"""
|
16 |
+
|
17 |
+
def __init__(self):
|
18 |
+
# ํ๊ฒฝ ๊ฐ์ง
|
19 |
+
self.is_local = self._detect_local_environment()
|
20 |
+
|
21 |
+
# ๋ชจ๋ธ ๊ฒฝ๋ก ์ค์
|
22 |
+
if self.is_local:
|
23 |
+
self.model_name = "gbrabbit/lily-math-model" # ๋ก์ปฌ์์๋ HF ๋ชจ๋ธ๋ช
์ฌ์ฉ
|
24 |
+
self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
|
25 |
+
self.display_name = "Kanana-1.5-v-3b-instruct (๋ก์ปฌ)"
|
26 |
+
else:
|
27 |
+
self.model_name = "gbrabbit/lily-math-model" # Hugging Face ๋ชจ๋ธ ๊ฒฝ๋ก
|
28 |
+
self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
|
29 |
+
self.display_name = "Kanana-1.5-v-3b-instruct (์๋ฒ)"
|
30 |
+
|
31 |
+
self.description = "์นด์นด์ค ๋ฉํฐ๋ชจ๋ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํนํ"
|
32 |
+
self.language = "ko"
|
33 |
+
self.model_size = "3.6B"
|
34 |
+
self.multimodal = True
|
35 |
+
|
36 |
+
def _detect_local_environment(self) -> bool:
|
37 |
+
"""๋ก์ปฌ ํ๊ฒฝ์ธ์ง ๊ฐ์ง"""
|
38 |
+
import os
|
39 |
+
|
40 |
+
# ๋ก์ปฌ ํ๊ฒฝ ๊ฐ์ง ์กฐ๊ฑด๋ค
|
41 |
+
local_indicators = [
|
42 |
+
os.path.exists('.env'),
|
43 |
+
os.path.exists('../.env'),
|
44 |
+
os.path.exists('../../.env'),
|
45 |
+
os.getenv('IS_LOCAL') == 'true',
|
46 |
+
os.getenv('ENVIRONMENT') == 'local',
|
47 |
+
os.getenv('DOCKER_ENV') == 'local',
|
48 |
+
# Windows ๊ฒฝ๋ก ํ์ธ
|
49 |
+
os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
|
50 |
+
]
|
51 |
+
|
52 |
+
is_local = any(local_indicators)
|
53 |
+
logger.info(f"๐ ํ๊ฒฝ ๊ฐ์ง: {'๋ก์ปฌ' if is_local else '์๋ฒ'}")
|
54 |
+
return is_local
|
55 |
+
|
56 |
+
def _load_environment_variables(self):
|
57 |
+
"""ํ๊ฒฝ๋ณ์๋ฅผ ๋ก๋ํฉ๋๋ค."""
|
58 |
+
import os
|
59 |
+
|
60 |
+
try:
|
61 |
+
if self.is_local:
|
62 |
+
# ๋ก์ปฌ ํ๊ฒฝ: .env ํ์ผ ๋ก๋
|
63 |
+
from dotenv import load_dotenv
|
64 |
+
|
65 |
+
# ์ฌ๋ฌ ๊ฒฝ๋ก์์ .env ํ์ผ ์ฐพ๊ธฐ
|
66 |
+
env_paths = [
|
67 |
+
'.env',
|
68 |
+
'../.env',
|
69 |
+
'../../.env',
|
70 |
+
'C:/Project/lily_generate_project/lily_generate_package/.env',
|
71 |
+
]
|
72 |
+
|
73 |
+
env_loaded = False
|
74 |
+
for env_path in env_paths:
|
75 |
+
if os.path.exists(env_path):
|
76 |
+
load_dotenv(env_path)
|
77 |
+
logger.info(f"โ
ํ๊ฒฝ๋ณ์ ๋ก๋๋จ: {env_path}")
|
78 |
+
env_loaded = True
|
79 |
+
break
|
80 |
+
|
81 |
+
if not env_loaded:
|
82 |
+
logger.warning("โ ๏ธ .env ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค")
|
83 |
+
else:
|
84 |
+
# ์๋ฒ ํ๊ฒฝ: ์์คํ
ํ๊ฒฝ๋ณ์ ์ฌ์ฉ
|
85 |
+
logger.info("๐ ์๋ฒ ํ๊ฒฝ๋ณ์ ์ฌ์ฉ")
|
86 |
+
|
87 |
+
except ImportError:
|
88 |
+
logger.warning("โ ๏ธ python-dotenv๊ฐ ์ค์น๋์ง ์์")
|
89 |
+
except Exception as e:
|
90 |
+
logger.error(f"โ ํ๊ฒฝ๋ณ์ ๋ก๋ ์คํจ: {e}")
|
91 |
+
|
92 |
+
def load_model(self) -> Tuple[Any, Any]:
|
93 |
+
"""ํ๊ฒฝ์ ๋ฐ๋ผ ๋ชจ๋ธ์ ๋ก๋ํฉ๋๋ค."""
|
94 |
+
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
95 |
+
|
96 |
+
import os
|
97 |
+
from pathlib import Path
|
98 |
+
|
99 |
+
# ํ๊ฒฝ๋ณ์ ๋ก๋ฉ
|
100 |
+
self._load_environment_variables()
|
101 |
+
|
102 |
+
try:
|
103 |
+
# 1. ๋ก์ปฌ ์บ์ ๊ฒฝ๋ก๊ฐ ์๋์ง ํ์ธ
|
104 |
+
local_model_path = Path(self.local_path)
|
105 |
+
use_local = local_model_path.exists() and any(local_model_path.iterdir())
|
106 |
+
|
107 |
+
if use_local:
|
108 |
+
logger.info(f"๐๏ธ ๋ก์ปฌ ๋ชจ๋ธ ์ฌ์ฉ: {self.local_path}")
|
109 |
+
model_path = self.local_path
|
110 |
+
local_files_only = True
|
111 |
+
|
112 |
+
# ๋ก์ปฌ ๋ชจ๋ธ์ ๊ฒฝ์ฐ sys.path์ ์ถ๊ฐ
|
113 |
+
if self.local_path not in sys.path:
|
114 |
+
sys.path.insert(0, self.local_path)
|
115 |
+
else:
|
116 |
+
logger.info(f"๐ Hugging Face Hub์์ ๋ค์ด๋ก๋: {self.model_name}")
|
117 |
+
model_path = self.model_name
|
118 |
+
local_files_only = False
|
119 |
+
|
120 |
+
# ํ๊ฒฝ๋ณ ์ถ๊ฐ ์ค์
|
121 |
+
if self.is_local:
|
122 |
+
logger.info("๐ ๋ก์ปฌ ํ๊ฒฝ ์ค์ ์ ์ฉ")
|
123 |
+
# ๋ก์ปฌ ํ๊ฒฝ์์๋ ์ถ๊ฐ ์ค์ ์ด ํ์ํ ์ ์์
|
124 |
+
else:
|
125 |
+
logger.info("โ๏ธ ์๋ฒ ํ๊ฒฝ ์ค์ ์ ์ฉ")
|
126 |
+
# ์๋ฒ ํ๊ฒฝ์์๋ ์บ์ ๋๋ ํ ๋ฆฌ ๋ฑ ์ค์
|
127 |
+
|
128 |
+
# 2. ํ ํฌ๋์ด๏ฟฝ๏ฟฝ๏ฟฝ ๋ก๋
|
129 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
130 |
+
model_path,
|
131 |
+
trust_remote_code=True,
|
132 |
+
local_files_only=local_files_only,
|
133 |
+
cache_dir="/app/cache/transformers" if not use_local else None
|
134 |
+
)
|
135 |
+
logger.info(f"โ
ํ ํฌ๋์ด์ ๋ก๋ ์๋ฃ ({tokenizer.__class__.__name__})")
|
136 |
+
|
137 |
+
# 3. ๋ชจ๋ธ ๋ก๋
|
138 |
+
if use_local:
|
139 |
+
# ๋ก์ปฌ ๋ชจ๋ธ: ์ปค์คํ
๋ชจ๋ธ๋ง ํด๋์ค ์ฌ์ฉ
|
140 |
+
from modeling import KananaVForConditionalGeneration
|
141 |
+
model = KananaVForConditionalGeneration.from_pretrained(
|
142 |
+
model_path,
|
143 |
+
trust_remote_code=True,
|
144 |
+
torch_dtype=torch.float16,
|
145 |
+
local_files_only=True,
|
146 |
+
low_cpu_mem_usage=True,
|
147 |
+
).to(DEVICE)
|
148 |
+
else:
|
149 |
+
# Hugging Face Hub: ํ์ค AutoModel ์ฌ์ฉ
|
150 |
+
from transformers import AutoModelForCausalLM
|
151 |
+
model = AutoModelForCausalLM.from_pretrained(
|
152 |
+
model_path,
|
153 |
+
trust_remote_code=True,
|
154 |
+
torch_dtype=torch.float16,
|
155 |
+
cache_dir="/app/cache/transformers",
|
156 |
+
low_cpu_mem_usage=True,
|
157 |
+
).to(DEVICE)
|
158 |
+
|
159 |
+
logger.info(f"โ
๋ชจ๋ธ ๋ก๋ ์๋ฃ ({model.__class__.__name__})")
|
160 |
+
return model, tokenizer
|
161 |
+
|
162 |
+
except Exception as e:
|
163 |
+
logger.error(f"โ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}", exc_info=True)
|
164 |
+
if use_local and self.local_path in sys.path:
|
165 |
+
sys.path.remove(self.local_path)
|
166 |
+
raise
|
167 |
+
|
168 |
+
def get_generation_config(self) -> Dict[str, Any]:
|
169 |
+
# ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ ์ต์ ํ ์ค์ , max_new_tokens : ์์ฑ๋๋ ํ
์คํธ ๊ธธ์ด ์ต๋๊ฐ (์ด๋ฏธ์ง ์ค๋ช
์ ์ํด ์ฆ๊ฐ)
|
170 |
+
return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
|
171 |
+
|
172 |
+
def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
|
173 |
+
"""
|
174 |
+
๋ค์ํ ์๋ต ํ์์ ์ฒ๋ฆฌํ ์ ์๋ ๋ ๋๋ํ ์๋ต ์ถ์ถ ํจ์
|
175 |
+
"""
|
176 |
+
logger.info(f"--- ์๋ต ์ถ์ถ ์์ ---")
|
177 |
+
logger.info(f"์ ์ฒด ์์ฑ ํ
์คํธ (Raw): \n---\n{full_text}\n---")
|
178 |
+
|
179 |
+
# ํ๋กฌํํธ๊ฐ ์ ๊ณต๋ ๊ฒฝ์ฐ ์ด๋ฅผ ์ ๊ฑฐ
|
180 |
+
if formatted_prompt and formatted_prompt in full_text:
|
181 |
+
response = full_text.replace(formatted_prompt, "").strip()
|
182 |
+
logger.info(f"โ
์ฑ๊ณต: ํ๋กฌํํธ ์ ๊ฑฐ๋ก ์๋ต ์ถ์ถ")
|
183 |
+
logger.info(f"์ถ์ถ๋ ์๋ต: {response}")
|
184 |
+
if response: # ๋น ๋ฌธ์์ด์ด ์๋ ๊ฒฝ์ฐ์๋ง ๋ฐํ
|
185 |
+
return response
|
186 |
+
|
187 |
+
# 1์์: ๊ฐ์ฅ ์ ํํ ํน์ ํ๊ทธ๋ก ์ถ์ถ ์๋
|
188 |
+
# ์: <|start_header_id|>assistant<|end_header_id|>์๋
ํ์ธ์...
|
189 |
+
# ๋๋ <|im_start|>assistant์๋
ํ์ธ์...
|
190 |
+
assistant_tags = [
|
191 |
+
"<|start_header_id|>assistant<|end_header_id|>",
|
192 |
+
"<|im_start|>assistant",
|
193 |
+
"assistant\n",
|
194 |
+
"assistant:"
|
195 |
+
]
|
196 |
+
for tag in assistant_tags:
|
197 |
+
if tag in full_text:
|
198 |
+
parts = full_text.split(tag)
|
199 |
+
if len(parts) > 1:
|
200 |
+
response = parts[-1].strip()
|
201 |
+
# ์ถ๊ฐ ์ ๋ฆฌ: ํน์ ํ ํฐ ์ ๊ฑฐ
|
202 |
+
response = response.replace("<|im_end|>", "").strip()
|
203 |
+
logger.info(f"โ
์ฑ๊ณต: '{tag}' ํ๊ทธ๋ก ์๋ต ์ถ์ถ")
|
204 |
+
logger.info(f"์ถ์ถ๋ ์๋ต: {response}")
|
205 |
+
if response: # ๋น ๋ฌธ์์ด์ด ์๋ ๊ฒฝ์ฐ์๋ง ๋ฐํ
|
206 |
+
return response
|
207 |
+
|
208 |
+
# 2์์: ๊ฐ๋จํ ํค์๋๋ก ์ถ์ถ ์๋
|
209 |
+
# ์: ... user ์๋
ํ์ธ์ assistant ์๋
ํ์ธ์ ...
|
210 |
+
if "assistant" in full_text:
|
211 |
+
parts = full_text.split("assistant")
|
212 |
+
if len(parts) > 1:
|
213 |
+
response = parts[-1].strip()
|
214 |
+
response = response.replace("<|im_end|>", "").strip()
|
215 |
+
logger.info("โ
์ฑ๊ณต: 'assistant' ํค์๋๋ก ์๋ต ์ถ์ถ")
|
216 |
+
logger.info(f"์ถ์ถ๋ ์๋ต: {response}")
|
217 |
+
if response: # ๋น ๋ฌธ์์ด์ด ์๋ ๊ฒฝ์ฐ์๋ง ๋ฐํ
|
218 |
+
return response
|
219 |
+
|
220 |
+
# 3์์: ํ๋กฌํํธ๊ฐ ์๋ ๊ฒฝ์ฐ, ์ ์ฒด ํ
์คํธ์์ ๋ถํ์ํ ๋ถ๋ถ ์ ๊ฑฐ
|
221 |
+
clean_text = full_text.strip()
|
222 |
+
# ์ผ๋ฐ์ ์ธ ํ๋กฌํํธ ํจํด ์ ๊ฑฐ ์๋
|
223 |
+
patterns_to_remove = [
|
224 |
+
"<|im_start|>user\n",
|
225 |
+
"<|im_end|>",
|
226 |
+
"<image>",
|
227 |
+
"user\n",
|
228 |
+
"assistant\n"
|
229 |
+
]
|
230 |
+
|
231 |
+
for pattern in patterns_to_remove:
|
232 |
+
clean_text = clean_text.replace(pattern, "")
|
233 |
+
|
234 |
+
clean_text = clean_text.strip()
|
235 |
+
|
236 |
+
if clean_text and clean_text != full_text:
|
237 |
+
logger.info("โ
์ฑ๊ณต: ํจํด ์ ๊ฑฐ๋ก ์๋ต ์ ๋ฆฌ")
|
238 |
+
logger.info(f"์ ๋ฆฌ๋ ์๋ต: {clean_text}")
|
239 |
+
return clean_text
|
240 |
+
|
241 |
+
logger.warning("โ ๏ธ ๊ฒฝ๊ณ : ์๋ต์์ assistant ๋ถ๋ถ์ ์ฐพ์ง ๋ชปํ์ต๋๋ค. ์ ์ฒด ํ
์คํธ๋ฅผ ๋ฐํํฉ๋๋ค.")
|
242 |
+
logger.info(f"์ต์ข
๋ฐํ ํ
์คํธ: {full_text}")
|
243 |
+
return full_text
|
244 |
+
|
245 |
+
def get_model_info(self) -> Dict[str, Any]:
|
246 |
+
return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
|
lily_llm_api/models/kanana_nano_2_1b_instruct.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Kanana Nano 2.1B Instruct ๋ชจ๋ธ ํ๋กํ
|
4 |
+
"""
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
8 |
+
from typing import Dict, Any, Tuple
|
9 |
+
import logging
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
class KananaNano21bInstructProfile:
|
14 |
+
"""Kanana Nano 2.1B Instruct ๋ชจ๋ธ ํ๋กํ"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.model_name = "kakaocorp/kanana-nano-2.1b-instruct"
|
18 |
+
self.local_path = "./lily_llm_core/models/kanana-nano-2.1b-instruct"
|
19 |
+
self.display_name = "Kanana Nano 2.1B Instruct"
|
20 |
+
self.description = "Kakao์ Kanana Nano 2.1B Instruct ๋ชจ๋ธ (๊ฐ์ฅ ์์ ๋ชจ๋ธ)"
|
21 |
+
self.language = ["ko", "en"]
|
22 |
+
self.model_size = "2.1B"
|
23 |
+
|
24 |
+
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
25 |
+
"""๋ชจ๋ธ ๋ก๋"""
|
26 |
+
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
27 |
+
|
28 |
+
try:
|
29 |
+
# ํ ํฌ๋์ด์ ๋ก๋
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
31 |
+
self.local_path,
|
32 |
+
trust_remote_code=True,
|
33 |
+
local_files_only=True
|
34 |
+
)
|
35 |
+
|
36 |
+
# ๋ชจ๋ธ ๋ก๋
|
37 |
+
model = AutoModelForCausalLM.from_pretrained(
|
38 |
+
self.local_path,
|
39 |
+
torch_dtype=torch.float32,
|
40 |
+
device_map="cpu",
|
41 |
+
low_cpu_mem_usage=True,
|
42 |
+
local_files_only=True
|
43 |
+
)
|
44 |
+
|
45 |
+
# ํ ํฌ๋์ด์ ์ค์
|
46 |
+
if tokenizer.pad_token is None:
|
47 |
+
tokenizer.pad_token = tokenizer.eos_token
|
48 |
+
if tokenizer.eos_token is None:
|
49 |
+
tokenizer.eos_token = "</s>"
|
50 |
+
|
51 |
+
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต!")
|
52 |
+
return model, tokenizer
|
53 |
+
|
54 |
+
except Exception as e:
|
55 |
+
logger.error(f"โ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
56 |
+
raise
|
57 |
+
|
58 |
+
def format_prompt(self, user_input: str) -> str:
|
59 |
+
"""ํ๋กฌํํธ ํฌ๋งทํ
"""
|
60 |
+
return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
|
61 |
+
|
62 |
+
def extract_response(self, full_text: str, formatted_prompt: str) -> str:
|
63 |
+
"""์๋ต ์ถ์ถ"""
|
64 |
+
if "<|im_start|>assistant\n" in full_text:
|
65 |
+
response = full_text.split("<|im_start|>assistant\n")[-1]
|
66 |
+
if "<|im_end|>" in response:
|
67 |
+
response = response.split("<|im_end|>")[0]
|
68 |
+
return response.strip()
|
69 |
+
return full_text.strip()
|
70 |
+
|
71 |
+
def get_generation_config(self) -> Dict[str, Any]:
|
72 |
+
"""์์ฑ ์ค์ """
|
73 |
+
return {
|
74 |
+
"max_new_tokens": 128, # 512์์ 128๋ก ์ค์
|
75 |
+
"temperature": 0.7,
|
76 |
+
"top_p": 0.9,
|
77 |
+
"do_sample": True,
|
78 |
+
"repetition_penalty": 1.1,
|
79 |
+
"no_repeat_ngram_size": 3,
|
80 |
+
"pad_token_id": None, # ํ ํฌ๋์ด์ ์์ ์ค์ ๋จ
|
81 |
+
"eos_token_id": None, # ํ ํฌ๋์ด์ ์์ ์ค์ ๋จ
|
82 |
+
"use_cache": True, # ์บ์ ์ฌ์ฉ
|
83 |
+
"return_dict_in_generate": False, # ๋ฉ๋ชจ๋ฆฌ ์ ์ฝ
|
84 |
+
}
|
85 |
+
|
86 |
+
def get_model_info(self) -> Dict[str, Any]:
|
87 |
+
"""๋ชจ๋ธ ์ ๋ณด"""
|
88 |
+
return {
|
89 |
+
"model_name": self.model_name,
|
90 |
+
"display_name": self.display_name,
|
91 |
+
"description": self.description,
|
92 |
+
"language": self.language,
|
93 |
+
"model_size": self.model_size,
|
94 |
+
"local_path": self.local_path
|
95 |
+
}
|
lily_llm_api/models/mistral_7b_instruct.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
"""
|
3 |
+
Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ ํ๋กํ
|
4 |
+
mistralai/Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ์ฉ
|
5 |
+
"""
|
6 |
+
|
7 |
+
from typing import Dict, Any, Tuple
|
8 |
+
import torch
|
9 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
10 |
+
import logging
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
class Mistral7bInstructProfile:
|
15 |
+
"""Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ ํ๋กํ"""
|
16 |
+
|
17 |
+
def __init__(self):
|
18 |
+
self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
19 |
+
self.local_path = "./lily_llm_core/models/mistral-7B-Instruct-v0.2"
|
20 |
+
self.display_name = "Mistral-7B-Instruct-v0.2"
|
21 |
+
self.description = "Mistral AI์ 7B ํ๋ผ๋ฏธํฐ ์ธ์คํธ๋ญํธ ๋ชจ๋ธ"
|
22 |
+
self.language = "en"
|
23 |
+
self.model_size = "7B"
|
24 |
+
|
25 |
+
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
26 |
+
"""๋ชจ๋ธ ๋ก๋"""
|
27 |
+
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
28 |
+
|
29 |
+
try:
|
30 |
+
# ๋ก์ปฌ ๋ชจ๋ธ ๋ก๋
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
|
32 |
+
|
33 |
+
if tokenizer.pad_token is None:
|
34 |
+
tokenizer.pad_token = tokenizer.eos_token
|
35 |
+
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(
|
37 |
+
self.local_path,
|
38 |
+
trust_remote_code=True,
|
39 |
+
local_files_only=True,
|
40 |
+
torch_dtype=torch.bfloat16,
|
41 |
+
# device_map="cpu",
|
42 |
+
# low_cpu_mem_usage=True
|
43 |
+
# max_memory={"cpu": "8GB"}
|
44 |
+
)
|
45 |
+
|
46 |
+
# ๋ชจ๋ธ์ CPU๋ก ๋ช
์์ ์ด๋
|
47 |
+
model.to('cpu')
|
48 |
+
|
49 |
+
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต!")
|
50 |
+
return model, tokenizer
|
51 |
+
|
52 |
+
except Exception as e:
|
53 |
+
logger.error(f"โ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์คํจ: {e}")
|
54 |
+
raise
|
55 |
+
|
56 |
+
def format_prompt(self, user_input: str) -> str:
|
57 |
+
"""ํ๋กฌํํธ ํฌ๋งทํ
- Mistral ์ธ์คํธ๋ญํธ ํ์"""
|
58 |
+
# Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ์ ๊ถ์ฅ ํ๋กฌํํธ ํ์
|
59 |
+
prompt = f"""<s>[INST] {user_input} [/INST]"""
|
60 |
+
return prompt
|
61 |
+
|
62 |
+
def extract_response(self, full_text: str, formatted_prompt: str) -> str:
|
63 |
+
"""์๋ต ์ถ์ถ"""
|
64 |
+
# Mistral ๋ชจ๋ธ์ ์๋ต ์ถ์ถ
|
65 |
+
if "[/INST]" in full_text:
|
66 |
+
response = full_text.split("[/INST]")[-1].strip()
|
67 |
+
else:
|
68 |
+
# ํ๋กฌํํธ ์ ๊ฑฐ
|
69 |
+
if formatted_prompt in full_text:
|
70 |
+
response = full_text.replace(formatted_prompt, "").strip()
|
71 |
+
else:
|
72 |
+
response = full_text.strip()
|
73 |
+
|
74 |
+
# ๋น ์๋ต์ด๋ ์ด์ํ ๋ฌธ์๋ง ์๋ ๊ฒฝ์ฐ ์ฒ๋ฆฌ
|
75 |
+
if not response or len(response.strip()) < 2:
|
76 |
+
return "Hello! How can I help you today?"
|
77 |
+
|
78 |
+
return response
|
79 |
+
|
80 |
+
def get_generation_config(self) -> Dict[str, Any]:
|
81 |
+
"""์์ฑ ์ค์ """
|
82 |
+
return {
|
83 |
+
"max_new_tokens": 128,
|
84 |
+
"temperature": 0.7,
|
85 |
+
"do_sample": True,
|
86 |
+
"top_k": 50,
|
87 |
+
"top_p": 0.9,
|
88 |
+
"repetition_penalty": 1.1,
|
89 |
+
"no_repeat_ngram_size": 3,
|
90 |
+
"pad_token_id": None, # ๋ชจ๋ธ์์ ์๋ ์ค์
|
91 |
+
"eos_token_id": None # ๋ชจ๋ธ์์ ์๋ ์ค์
|
92 |
+
}
|
93 |
+
|
94 |
+
def get_model_info(self) -> Dict[str, Any]:
|
95 |
+
"""๋ชจ๋ธ ์ ๋ณด"""
|
96 |
+
return {
|
97 |
+
"model_name": self.model_name,
|
98 |
+
"display_name": self.display_name,
|
99 |
+
"description": self.description,
|
100 |
+
"language": self.language,
|
101 |
+
"model_size": self.model_size,
|
102 |
+
"local_path": self.local_path
|
103 |
+
}
|