gbrabbit commited on
Commit
526927a
ยท
0 Parent(s):

Fresh start for HF Spaces deployment

Browse files
This view is limited to 50 files because it contains too many changes. ย  See raw diff
Files changed (50) hide show
  1. .dockerignore +119 -0
  2. .gitattributes +2 -0
  3. .gitignore +230 -0
  4. .vscode/settings.json +4 -0
  5. DEPLOYMENT_CHECKLIST.md +152 -0
  6. DEPLOYMENT_GUIDE.md +204 -0
  7. Dockerfile +66 -0
  8. Dockerfile.blank +43 -0
  9. Dockerfile.gpu +57 -0
  10. Dockerfile.huggingface +66 -0
  11. Dockerfile.huggingface.predownload +65 -0
  12. Dockerfile.latex-ocr +70 -0
  13. Dockerfile.local +54 -0
  14. ENVIRONMENT_VARIABLES.md +162 -0
  15. GPU_DEPLOYMENT_GUIDE.md +240 -0
  16. HEARTH_CHAT_INTEGRATION.md +382 -0
  17. HISTORY.md +191 -0
  18. HUGGINGFACE_CLOUD_GUIDE.md +241 -0
  19. PROMPT.md +105 -0
  20. README.md +137 -0
  21. README_DEPLOYMENT.md +304 -0
  22. README_LILY.md +137 -0
  23. README_gradio.md +40 -0
  24. README_huggingface.md +137 -0
  25. WINDOWS_GPU_DEPLOYMENT_GUIDE.md +292 -0
  26. __init__.py +1 -0
  27. app_huggingface.py +102 -0
  28. app_local.py +19 -0
  29. config.yaml +1 -0
  30. deploy_gpu.sh +76 -0
  31. deploy_gpu_huggingface.sh +90 -0
  32. deploy_gpu_windows.bat +91 -0
  33. docker-compose.gpu.yml +66 -0
  34. docker-compose.yml +101 -0
  35. docs/API_REFERENCE.md +507 -0
  36. docs/USER_GUIDE.md +719 -0
  37. download_model.py +57 -0
  38. fix_huggingface_hub.bat +31 -0
  39. huggingface_cloud_setup.py +186 -0
  40. huggingface_gpu_setup.py +210 -0
  41. lily-math-rag +1 -0
  42. lily_llm.db +0 -0
  43. lily_llm_api/app.py +246 -0
  44. lily_llm_api/app_v2.py +2049 -0
  45. lily_llm_api/models/__init__.py +49 -0
  46. lily_llm_api/models/dialogpt_medium.py +82 -0
  47. lily_llm_api/models/kanana_1_5_2_1b_instruct.py +93 -0
  48. lily_llm_api/models/kanana_1_5_v_3b_instruct.py +246 -0
  49. lily_llm_api/models/kanana_nano_2_1b_instruct.py +95 -0
  50. lily_llm_api/models/mistral_7b_instruct.py +103 -0
.dockerignore ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+ .env
5
+ lily_llm_media/
6
+ lily_llm_env/
7
+ lily_llm_core/models/
8
+ lily_llm_ignore/
9
+ vector_stores/
10
+ latex_ocr_env/
11
+ lily_llm_utils/LaTeX-OCR/
12
+ hearth_llm_model/
13
+ ocr_models/
14
+ latex_ocr_faiss_simple/
15
+ latex_ocr_faiss_stores/
16
+ uploads/
17
+ simple_stores/
18
+ notebooks/
19
+ lily_llm_etc/
20
+ *.safetensors
21
+ *.pth
22
+
23
+ # Python
24
+ __pycache__/
25
+ *.py[cod]
26
+ *$py.class
27
+ *.so
28
+ .Python
29
+ build/
30
+ develop-eggs/
31
+ dist/
32
+ downloads/
33
+ eggs/
34
+ .eggs/
35
+ lib/
36
+ lib64/
37
+ parts/
38
+ sdist/
39
+ var/
40
+ wheels/
41
+ *.egg-info/
42
+ .installed.cfg
43
+ *.egg
44
+ MANIFEST
45
+
46
+ # Virtual environments
47
+ venv/
48
+ env/
49
+ ENV/
50
+ .venv/
51
+ .env/
52
+
53
+ # IDE
54
+ .vscode/
55
+ .idea/
56
+ *.swp
57
+ *.swo
58
+ *~
59
+
60
+ # OS
61
+ .DS_Store
62
+ Thumbs.db
63
+
64
+ # Logs
65
+ *.log
66
+ logs/
67
+ *.out
68
+
69
+ # Cache
70
+ cache/
71
+ .cache/
72
+ __pycache__/
73
+
74
+ # Temporary files
75
+ temp/
76
+ tmp/
77
+ *.tmp
78
+ *.temp
79
+
80
+ # Large model files (will be downloaded at runtime from Hugging Face Hub)
81
+ *.bin
82
+ *.safetensors
83
+ *.pt
84
+ *.pth
85
+ models/*/
86
+ lily_llm_core/models/*/
87
+
88
+ # Data files
89
+ data/
90
+ uploads/
91
+ vector_stores/
92
+
93
+ # Backup files
94
+ backup/
95
+ *.backup
96
+ *.bak
97
+
98
+ # Documentation (except README)
99
+ docs/
100
+ *.md
101
+ !README_huggingface.md
102
+
103
+ # Test files
104
+ test_*
105
+ *_test.py
106
+ tests/
107
+
108
+ # Docker
109
+ Dockerfile
110
+ Dockerfile.*
111
+ !Dockerfile.huggingface
112
+ docker-compose.yml
113
+ docker-compose.yaml
114
+
115
+ # Other configs
116
+ .env
117
+ .env.*
118
+ config.yaml
119
+ config.json
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pdf filter=lfs diff=lfs merge=lfs -text
2
+ *.zip filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .github/
2
+ .env
3
+ lily_llm_env/
4
+ lily_llm_core/models/
5
+ __pycache__/
6
+ *.pyc
7
+ .ipynb_checkpoints/
8
+ lily_llm_media/
9
+ vector_stores/
10
+ latex_ocr_env/
11
+ lily_llm_ignore/
12
+ lily_llm_utils/LaTeX-OCR/
13
+ hearth_llm_model/
14
+ ocr_models/
15
+ latex_ocr_faiss_simple/
16
+ latex_ocr_faiss_stores/
17
+ uploads/
18
+ simple_stores/
19
+ notebooks/
20
+ lily_llm_etc/
21
+ *.safetensors
22
+ *.pth
23
+
24
+ # Byte-compiled / optimized / DLL files
25
+ __pycache__/
26
+ *.py[codz]
27
+ *$py.class
28
+
29
+ # C extensions
30
+ *.so
31
+
32
+ # Distribution / packaging
33
+ .Python
34
+ build/
35
+ develop-eggs/
36
+ dist/
37
+ downloads/
38
+ eggs/
39
+ .eggs/
40
+ lib/
41
+ lib64/
42
+ parts/
43
+ sdist/
44
+ var/
45
+ wheels/
46
+ share/python-wheels/
47
+ *.egg-info/
48
+ .installed.cfg
49
+ *.egg
50
+ MANIFEST
51
+
52
+ # PyInstaller
53
+ # Usually these files are written by a python script from a template
54
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
55
+ *.manifest
56
+ *.spec
57
+
58
+ # Installer logs
59
+ pip-log.txt
60
+ pip-delete-this-directory.txt
61
+
62
+ # Unit test / coverage reports
63
+ htmlcov/
64
+ .tox/
65
+ .nox/
66
+ .coverage
67
+ .coverage.*
68
+ .cache
69
+ nosetests.xml
70
+ coverage.xml
71
+ *.cover
72
+ *.py.cover
73
+ .hypothesis/
74
+ .pytest_cache/
75
+ cover/
76
+
77
+ # Translations
78
+ *.mo
79
+ *.pot
80
+
81
+ # Django stuff:
82
+ *.log
83
+ local_settings.py
84
+ db.sqlite3
85
+ db.sqlite3-journal
86
+
87
+ # Flask stuff:
88
+ instance/
89
+ .webassets-cache
90
+
91
+ # Scrapy stuff:
92
+ .scrapy
93
+
94
+ # Sphinx documentation
95
+ docs/_build/
96
+
97
+ # PyBuilder
98
+ .pybuilder/
99
+ target/
100
+
101
+ # Jupyter Notebook
102
+ .ipynb_checkpoints
103
+
104
+ # IPython
105
+ profile_default/
106
+ ipython_config.py
107
+
108
+ # pyenv
109
+ # For a library or package, you might want to ignore these files since the code is
110
+ # intended to run in multiple environments; otherwise, check them in:
111
+ # .python-version
112
+
113
+ # pipenv
114
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
115
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
116
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
117
+ # install all needed dependencies.
118
+ #Pipfile.lock
119
+
120
+ # UV
121
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
122
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
123
+ # commonly ignored for libraries.
124
+ #uv.lock
125
+
126
+ # poetry
127
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
128
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
129
+ # commonly ignored for libraries.
130
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
131
+ #poetry.lock
132
+ #poetry.toml
133
+
134
+ # pdm
135
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
136
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
137
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
138
+ #pdm.lock
139
+ #pdm.toml
140
+ .pdm-python
141
+ .pdm-build/
142
+
143
+ # pixi
144
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
145
+ #pixi.lock
146
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
147
+ # in the .venv directory. It is recommended not to include this directory in version control.
148
+ .pixi
149
+
150
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
151
+ __pypackages__/
152
+
153
+ # Celery stuff
154
+ celerybeat-schedule
155
+ celerybeat.pid
156
+
157
+ # SageMath parsed files
158
+ *.sage.py
159
+
160
+ # Environments
161
+ .env
162
+ .envrc
163
+ .venv
164
+ env/
165
+ venv/
166
+ ENV/
167
+ env.bak/
168
+ venv.bak/
169
+
170
+ # Spyder project settings
171
+ .spyderproject
172
+ .spyproject
173
+
174
+ # Rope project settings
175
+ .ropeproject
176
+
177
+ # mkdocs documentation
178
+ /site
179
+
180
+ # mypy
181
+ .mypy_cache/
182
+ .dmypy.json
183
+ dmypy.json
184
+
185
+ # Pyre type checker
186
+ .pyre/
187
+
188
+ # pytype static type analyzer
189
+ .pytype/
190
+
191
+ # Cython debug symbols
192
+ cython_debug/
193
+
194
+ # PyCharm
195
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
196
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
197
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
198
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
199
+ #.idea/
200
+
201
+ # Abstra
202
+ # Abstra is an AI-powered process automation framework.
203
+ # Ignore directories containing user credentials, local state, and settings.
204
+ # Learn more at https://abstra.io/docs
205
+ .abstra/
206
+
207
+ # Visual Studio Code
208
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
209
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
210
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
211
+ # you could uncomment the following to ignore the entire vscode folder
212
+ # .vscode/
213
+
214
+ # Ruff stuff:
215
+ .ruff_cache/
216
+
217
+ # PyPI configuration file
218
+ .pypirc
219
+
220
+ # Cursor
221
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
222
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
223
+ # refer to https://docs.cursor.com/context/ignore-files
224
+ .cursorignore
225
+ .cursorindexingignore
226
+
227
+ # Marimo
228
+ marimo/_static/
229
+ marimo/_lsp/
230
+ __marimo__/
.vscode/settings.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "terminal.integrated.cwd": "c:/Project/lily_generate_project/lily_generate_package"
3
+ }
4
+
DEPLOYMENT_CHECKLIST.md ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐Ÿš€ Lily LLM API - Hugging Face Spaces ๋ฐฐํฌ ์ฒดํฌ๋ฆฌ์ŠคํŠธ
2
+
3
+ ## โœ… ๋ฐฐํฌ ์ „ ์ฒดํฌ๋ฆฌ์ŠคํŠธ
4
+
5
+ ### ๐Ÿ“‹ ํ•„์ˆ˜ ํŒŒ์ผ ํ™•์ธ
6
+ - [ ] `Dockerfile.huggingface` - Docker ์„ค์ • ํŒŒ์ผ
7
+ - [ ] `app_huggingface.py` - Hugging Face Spaces ์ง„์ž…์ 
8
+ - [ ] `requirements_full.txt` - ์™„์ „ํ•œ ์˜์กด์„ฑ ํŒจํ‚ค์ง€ ๋ชฉ๋ก
9
+ - [ ] `README_huggingface.md` - ํ”„๋กœ์ ํŠธ ์„ค๋ช… ๋ฌธ์„œ
10
+ - [ ] `.dockerignore` - Docker ๋นŒ๋“œ ์ œ์™ธ ํŒŒ์ผ ๋ชฉ๋ก
11
+
12
+ ### ๐Ÿ”ง ํ”„๋กœ์ ํŠธ ๊ตฌ์กฐ ํ™•์ธ
13
+ - [ ] `lily_llm_api/` - FastAPI ์„œ๋ฒ„ ์ฝ”๋“œ
14
+ - [ ] `lily_llm_core/` - ํ•ต์‹ฌ RAG ๋ฐ AI ๋กœ์ง
15
+ - [ ] `lily_llm_utils/` - ์œ ํ‹ธ๋ฆฌํ‹ฐ ํ•จ์ˆ˜๋“ค
16
+ - [ ] ๋ชจ๋ธ ํŒŒ์ผ๋“ค์ด ์˜ฌ๋ฐ”๋ฅธ ๊ฒฝ๋กœ์— ์œ„์น˜
17
+
18
+ ### ๐ŸŒ Hugging Face ๊ณ„์ • ์ค€๋น„
19
+ - [ ] Hugging Face ๊ณ„์ • ์ƒ์„ฑ ์™„๋ฃŒ
20
+ - [ ] Write ๊ถŒํ•œ์ด ์žˆ๋Š” Access Token ์ƒ์„ฑ
21
+ - [ ] ํ† ํฐ์„ ์•ˆ์ „ํ•œ ๊ณณ์— ์ €์žฅ
22
+
23
+ ## ๐Ÿš€ ๋ฐฐํฌ ๋‹จ๊ณ„
24
+
25
+ ### 1๋‹จ๊ณ„: Hugging Face Space ์ƒ์„ฑ
26
+ - [ ] [Hugging Face Spaces](https://huggingface.co/spaces) ์ ‘์†
27
+ - [ ] "Create new Space" ํด๋ฆญ
28
+ - [ ] ๋‹ค์Œ ์„ค์ •์œผ๋กœ ์ƒ์„ฑ:
29
+ - [ ] **Space name**: `lily-llm-api` (๋˜๋Š” ์›ํ•˜๋Š” ์ด๋ฆ„)
30
+ - [ ] **SDK**: `Docker` ์„ ํƒ
31
+ - [ ] **Hardware**: `CPU basic` (๋ฌด๋ฃŒ) ๋˜๋Š” `CPU upgrade` (์œ ๋ฃŒ)
32
+ - [ ] **Visibility**: `Public` ๋˜๋Š” `Private`
33
+
34
+ ### 2๋‹จ๊ณ„: ํŒŒ์ผ ์—…๋กœ๋“œ
35
+ - [ ] Space ์ €์žฅ์†Œ ํด๋ก  ๋˜๋Š” ์›น ์ธํ„ฐํŽ˜์ด์Šค ์‚ฌ์šฉ
36
+ - [ ] ํ•„์ˆ˜ ํŒŒ์ผ๋“ค์„ ์˜ฌ๋ฐ”๋ฅธ ์ด๋ฆ„์œผ๋กœ ๋ณต์‚ฌ:
37
+ - [ ] `Dockerfile.huggingface` โ†’ `Dockerfile`
38
+ - [ ] `requirements_full.txt` โ†’ `requirements.txt`
39
+ - [ ] `README_huggingface.md` โ†’ `README.md`
40
+ - [ ] ์†Œ์Šค ์ฝ”๋“œ ๋””๋ ‰ํ† ๋ฆฌ๋“ค ๋ณต์‚ฌ
41
+ - [ ] Git commit ๋ฐ push (Git ๋ฐฉ์‹ ์‚ฌ์šฉ ์‹œ)
42
+
43
+ ### 3๋‹จ๊ณ„: ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
44
+ Space Settings > Variables์—์„œ ์„ค์ •:
45
+ - [ ] `HOST=0.0.0.0`
46
+ - [ ] `PORT=7860`
47
+ - [ ] `PYTHONPATH=/app`
48
+ - [ ] `PYTHONUNBUFFERED=1`
49
+ - [ ] `TOKENIZERS_PARALLELISM=false`
50
+ - [ ] `OMP_NUM_THREADS=1`
51
+ - [ ] `MKL_NUM_THREADS=1`
52
+
53
+ ### 4๋‹จ๊ณ„: ๋นŒ๋“œ ๋ฐ ๋ฐฐํฌ ํ™•์ธ
54
+ - [ ] Space ํŽ˜์ด์ง€์—์„œ ๋นŒ๋“œ ๋กœ๊ทธ ํ™•์ธ
55
+ - [ ] ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ๋กœ๊ทธ ๋ถ„์„ ๋ฐ ์ˆ˜์ •
56
+ - [ ] ๋นŒ๋“œ ์™„๋ฃŒ ํ›„ ์•ฑ ์‹คํ–‰ ํ™•์ธ
57
+
58
+ ## ๐Ÿงช ํ…Œ์ŠคํŠธ ์ฒดํฌ๋ฆฌ์ŠคํŠธ
59
+
60
+ ### API ๊ธฐ๋ณธ ํ…Œ์ŠคํŠธ
61
+ - [ ] Health Check: `GET /health`
62
+ ```bash
63
+ curl https://YOUR_USERNAME-lily-llm-api.hf.space/health
64
+ ```
65
+
66
+ - [ ] ๋ชจ๋ธ ๋ชฉ๋ก: `GET /models`
67
+ ```bash
68
+ curl https://YOUR_USERNAME-lily-llm-api.hf.space/models
69
+ ```
70
+
71
+ - [ ] ํ…์ŠคํŠธ ์ƒ์„ฑ: `POST /generate`
72
+ ```bash
73
+ curl -X POST https://YOUR_USERNAME-lily-llm-api.hf.space/generate \
74
+ -F "prompt=์•ˆ๋…•ํ•˜์„ธ์š”! ํ…Œ์ŠคํŠธ ๋ฉ”์‹œ์ง€์ž…๋‹ˆ๋‹ค."
75
+ ```
76
+
77
+ ### ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ
78
+ - [ ] ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ํ…Œ์ŠคํŠธ
79
+ - [ ] RAG ์‹œ์Šคํ…œ ํ…Œ์ŠคํŠธ (๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ ์งˆ์˜)
80
+ - [ ] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๊ธฐ๋Šฅ ํ…Œ์ŠคํŠธ
81
+
82
+ ### ์„ฑ๋Šฅ ํ…Œ์ŠคํŠธ
83
+ - [ ] ์‘๋‹ต ์‹œ๊ฐ„ ์ธก์ •
84
+ - [ ] ๋™์‹œ ์š”์ฒญ ์ฒ˜๋ฆฌ ํ™•์ธ
85
+ - [ ] ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๋ชจ๋‹ˆํ„ฐ๋ง
86
+
87
+ ## ๐Ÿ”— Hearth Chat ์—ฐ๋™ ์ค€๋น„
88
+
89
+ ### URL ๊ธฐ๋ก
90
+ ๋ฐฐํฌ ์™„๋ฃŒ ํ›„ ๋‹ค์Œ URL ๊ธฐ๋ก:
91
+ - [ ] Hugging Face Space URL: `https://YOUR_USERNAME-lily-llm-api.hf.space`
92
+ - [ ] API ๋ฌธ์„œ URL: `https://YOUR_USERNAME-lily-llm-api.hf.space/docs`
93
+
94
+ ### Hearth Chat ์„ค์ • ์—…๋ฐ์ดํŠธ
95
+ - [ ] AI ์„ค์ • ๋ชจ๋‹ฌ์— Lily LLM ์˜ต์…˜ ์ถ”๊ฐ€
96
+ - [ ] ๋ฐฑ์—”๋“œ consumers.py์— Hugging Face API ํ˜ธ์ถœ ๋กœ์ง ์ถ”๊ฐ€
97
+ - [ ] ํ™˜๊ฒฝ ๋ณ€์ˆ˜์— Lily LLM API URL ์„ค์ •
98
+
99
+ ## โŒ ๋ฌธ์ œ ํ•ด๊ฒฐ
100
+
101
+ ### ์ผ๋ฐ˜์ ์ธ ์˜ค๋ฅ˜์™€ ํ•ด๊ฒฐ์ฑ…
102
+
103
+ **๋นŒ๋“œ ์‹คํŒจ**
104
+ - [ ] `requirements.txt` ์˜์กด์„ฑ ํ™•์ธ
105
+ - [ ] `Dockerfile` ๋ฌธ๋ฒ• ์˜ค๋ฅ˜ ํ™•์ธ
106
+ - [ ] `.dockerignore` ํŒŒ์ผ ํ™•์ธ
107
+
108
+ **๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ**
109
+ - [ ] ๋ถˆํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์ œ๊ฑฐ
110
+ - [ ] ๋ชจ๋ธ ํฌ๊ธฐ ์ตœ์ ํ™”
111
+ - [ ] Hardware ์—…๊ทธ๋ ˆ์ด๋“œ ๊ณ ๋ ค
112
+
113
+ **๋ชจ๋“ˆ Import ์˜ค๋ฅ˜**
114
+ - [ ] `PYTHONPATH` ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ™•์ธ
115
+ - [ ] ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ ๊ตฌ์กฐ ํ™•์ธ
116
+ - [ ] ์˜์กด์„ฑ ํŒจํ‚ค์ง€ ๋ฒ„์ „ ํ™•์ธ
117
+
118
+ **API ์‘๋‹ต ์—†์Œ**
119
+ - [ ] ํฌํŠธ ์„ค์ • ํ™•์ธ (7860)
120
+ - [ ] ๋ฐฉํ™”๋ฒฝ ์„ค์ • ํ™•์ธ
121
+ - [ ] ๋กœ๊ทธ์—์„œ ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ํ™•์ธ
122
+
123
+ ## ๐Ÿ“Š ๋ชจ๋‹ˆํ„ฐ๋ง ์„ค์ •
124
+
125
+ ### ๋ฐฐํฌ ํ›„ ๋ชจ๋‹ˆํ„ฐ๋ง
126
+ - [ ] Space ๋Œ€์‹œ๋ณด๋“œ์—์„œ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
127
+ - [ ] ๋กœ๊ทธ ์ •๊ธฐ์  ํ™•์ธ
128
+ - [ ] ์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ๋ชจ๋‹ˆํ„ฐ๋ง
129
+ - [ ] ์‚ฌ์šฉ์ž ํ”ผ๋“œ๋ฐฑ ์ˆ˜์ง‘
130
+
131
+ ### ์œ ์ง€๋ณด์ˆ˜ ๊ณ„ํš
132
+ - [ ] ์ •๊ธฐ์  ์—…๋ฐ์ดํŠธ ๊ณ„ํš ์ˆ˜๋ฆฝ
133
+ - [ ] ๋ฐฑ์—… ์ „๋žต ์ˆ˜๋ฆฝ
134
+ - [ ] ์žฅ์•  ๋Œ€์‘ ๊ณ„ํš ์ˆ˜๋ฆฝ
135
+
136
+ ---
137
+
138
+ ## ๐ŸŽ‰ ๋ฐฐํฌ ์™„๋ฃŒ!
139
+
140
+ ๋ชจ๋“  ์ฒดํฌ๋ฆฌ์ŠคํŠธ ํ•ญ๋ชฉ์„ ์™„๋ฃŒํ•˜๋ฉด Lily LLM API๊ฐ€ Hugging Face Spaces์—์„œ ์„ฑ๊ณต์ ์œผ๋กœ ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค.
141
+
142
+ **๋‹ค์Œ ๋‹จ๊ณ„**: [HEARTH_CHAT_INTEGRATION.md](./HEARTH_CHAT_INTEGRATION.md)๋ฅผ ์ฐธ์กฐํ•˜์—ฌ Railway Hearth Chat๊ณผ ์—ฐ๋™์„ ์ง„ํ–‰ํ•˜์„ธ์š”.
143
+
144
+ ---
145
+
146
+ ## ๐Ÿ“ž ์ง€์›
147
+
148
+ ๋ฐฐํฌ ์ค‘ ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด:
149
+ 1. ๋กœ๊ทธ ํ™•์ธ ๋ฐ ๋ถ„์„
150
+ 2. [DEPLOYMENT_GUIDE.md](./DEPLOYMENT_GUIDE.md) ์ƒ์„ธ ๊ฐ€์ด๋“œ ์ฐธ์กฐ
151
+ 3. Hugging Face Community ํฌ๋Ÿผ ํ™œ์šฉ
152
+ 4. GitHub Issues๋ฅผ ํ†ตํ•œ ๊ธฐ์ˆ  ์ง€์›
DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API - Hugging Face Spaces ๋ฐฐํฌ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿš€ ๋ฐฐํฌ ๋‹จ๊ณ„๋ณ„ ๊ฐ€์ด๋“œ
4
+
5
+ ### 1. ์‚ฌ์ „ ์ค€๋น„
6
+
7
+ #### 1.1 Hugging Face ๊ณ„์ • ๋ฐ ํ† ํฐ
8
+ 1. [Hugging Face](https://huggingface.co) ๊ณ„์ • ์ƒ์„ฑ
9
+ 2. [Settings > Access Tokens](https://huggingface.co/settings/tokens)์—์„œ Write ๊ถŒํ•œ ํ† ํฐ ์ƒ์„ฑ
10
+ 3. ํ† ํฐ์„ ์•ˆ์ „ํ•œ ๊ณณ์— ์ €์žฅ
11
+
12
+ #### 1.2 ํ•„์š”ํ•œ ํŒŒ์ผ๋“ค ํ™•์ธ
13
+ - `Dockerfile.huggingface` - Docker ์„ค์ •
14
+ - `app_huggingface.py` - ์ง„์ž…์ 
15
+ - `requirements_full.txt` - ์˜์กด์„ฑ ํŒจํ‚ค์ง€
16
+ - `README_huggingface.md` - ํ”„๋กœ์ ํŠธ ์„ค๋ช…
17
+ - `.dockerignore` - Docker ๋นŒ๋“œ ์ œ์™ธ ํŒŒ์ผ
18
+
19
+ ### 2. Hugging Face Spaces ์ƒ์„ฑ
20
+
21
+ #### 2.1 Space ์ƒ์„ฑ
22
+ 1. [Hugging Face Spaces](https://huggingface.co/spaces) ์ ‘์†
23
+ 2. "Create new Space" ํด๋ฆญ
24
+ 3. ๋‹ค์Œ ์„ค์ •์œผ๋กœ Space ์ƒ์„ฑ:
25
+ - **Owner**: ๋ณธ์ธ ๊ณ„์ •
26
+ - **Space name**: `lily-llm-api`
27
+ - **License**: `MIT`
28
+ - **Select the Space SDK**: `Docker`
29
+ - **Space hardware**: `CPU basic` (๋ฌด๋ฃŒ) ๋˜๋Š” `CPU upgrade` (์œ ๋ฃŒ, ๋” ๋น ๋ฆ„)
30
+ - **Visibility**: `Public` ๋˜๋Š” `Private`
31
+
32
+ #### 2.2 Space ์„ค์ •
33
+ Space ์ƒ์„ฑ ํ›„ Settings์—์„œ:
34
+ - **Variables**: ํ•„์š”ํ•œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
35
+ - **Secrets**: API ํ‚ค ๋“ฑ ๋ฏผ๊ฐํ•œ ์ •๋ณด ์„ค์ •
36
+
37
+ ### 3. ์ฝ”๋“œ ๋ฐฐํฌ
38
+
39
+ #### 3.1 Git ๋ฐฉ์‹ (๊ถŒ์žฅ)
40
+
41
+ ```bash
42
+ # 1. Space ์ €์žฅ์†Œ ํด๋ก 
43
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/lily-llm-api
44
+ cd lily-llm-api
45
+
46
+ # 2. ํ•„์š”ํ•œ ํŒŒ์ผ๋“ค ๋ณต์‚ฌ
47
+ cp /path/to/lily_generate_package/Dockerfile.huggingface ./Dockerfile
48
+ cp /path/to/lily_generate_package/app_huggingface.py ./
49
+ cp /path/to/lily_generate_package/requirements_full.txt ./requirements.txt
50
+ cp /path/to/lily_generate_package/README_huggingface.md ./README.md
51
+ cp /path/to/lily_generate_package/.dockerignore ./
52
+
53
+ # 3. ํ”„๋กœ์ ํŠธ ์†Œ์Šค ์ฝ”๋“œ ๋ณต์‚ฌ
54
+ cp -r /path/to/lily_generate_package/lily_llm_api ./
55
+ cp -r /path/to/lily_generate_package/lily_llm_core ./
56
+ cp -r /path/to/lily_generate_package/lily_llm_utils ./
57
+
58
+ # 4. Git ์ปค๋ฐ‹ ๋ฐ ํ‘ธ์‹œ
59
+ git add .
60
+ git commit -m "Initial deployment of Lily LLM API"
61
+ git push
62
+ ```
63
+
64
+ #### 3.2 ์›น ์ธํ„ฐํŽ˜์ด์Šค ๋ฐฉ์‹
65
+
66
+ 1. Hugging Face Space ํŽ˜์ด์ง€์—์„œ "Files" ํƒญ ํด๋ฆญ
67
+ 2. "Add file" > "Upload files" ํด๋ฆญ
68
+ 3. ํ•„์š”ํ•œ ํŒŒ์ผ๋“ค์„ ๋“œ๋ž˜๊ทธ ์•ค ๋“œ๋กญ์œผ๋กœ ์—…๋กœ๋“œ
69
+ 4. ์ปค๋ฐ‹ ๋ฉ”์‹œ์ง€ ์ž‘์„ฑ ํ›„ "Commit changes" ํด๋ฆญ
70
+
71
+ ### 4. ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
72
+
73
+ Space Settings > Variables์—์„œ ๋‹ค์Œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋“ค ์„ค์ •:
74
+
75
+ ```bash
76
+ # ์„œ๋ฒ„ ์„ค์ •
77
+ HOST=0.0.0.0
78
+ PORT=7860
79
+ PYTHONPATH=/app
80
+ PYTHONUNBUFFERED=1
81
+
82
+ # ๋ชจ๋ธ ์„ค์ •
83
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
84
+ MAX_NEW_TOKENS=256
85
+ TEMPERATURE=0.7
86
+
87
+ # ์บ์‹œ ์„ค์ •
88
+ TRANSFORMERS_CACHE=/app/cache/transformers
89
+ HF_HOME=/app/cache/huggingface
90
+ TORCH_HOME=/app/cache/torch
91
+ TOKENIZERS_PARALLELISM=false
92
+
93
+ # ์„ฑ๋Šฅ ์ตœ์ ํ™”
94
+ OMP_NUM_THREADS=1
95
+ MKL_NUM_THREADS=1
96
+ ```
97
+
98
+ ### 5. ๋ฐฐํฌ ํ™•์ธ
99
+
100
+ #### 5.1 ๋นŒ๋“œ ๋กœ๊ทธ ํ™•์ธ
101
+ 1. Space ํŽ˜์ด์ง€์—์„œ "Logs" ํƒญ ํด๋ฆญ
102
+ 2. Docker ๋นŒ๋“œ ๋ฐ ์‹คํ–‰ ๋กœ๊ทธ ํ™•์ธ
103
+ 3. ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ๋กœ๊ทธ๋ฅผ ํ†ตํ•ด ๋ฌธ์ œ ํ•ด๊ฒฐ
104
+
105
+ #### 5.2 API ํ…Œ์ŠคํŠธ
106
+
107
+ ๋ฐฐํฌ ์™„๋ฃŒ ํ›„ ๋‹ค์Œ๊ณผ ๊ฐ™์ด ํ…Œ์ŠคํŠธ:
108
+
109
+ ```python
110
+ import requests
111
+
112
+ # Health check
113
+ response = requests.get("https://YOUR_USERNAME-lily-llm-api.hf.space/health")
114
+ print(response.json())
115
+
116
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ ํ…Œ์ŠคํŠธ
117
+ response = requests.post(
118
+ "https://YOUR_USERNAME-lily-llm-api.hf.space/generate",
119
+ data={"prompt": "์•ˆ๋…•ํ•˜์„ธ์š”! ํ…Œ์ŠคํŠธ ๋ฉ”์‹œ์ง€์ž…๋‹ˆ๋‹ค."}
120
+ )
121
+ print(response.json())
122
+ ```
123
+
124
+ ### 6. ์„ฑ๋Šฅ ์ตœ์ ํ™”
125
+
126
+ #### 6.1 ํ•˜๋“œ์›จ์–ด ์—…๊ทธ๋ ˆ์ด๋“œ
127
+ - ๋ฌด๋ฃŒ CPU basic: ์ œํ•œ์  ์„ฑ๋Šฅ
128
+ - ์œ ๋ฃŒ CPU upgrade: ๋” ๋น ๋ฅธ ์ฒ˜๋ฆฌ
129
+ - GPU ์˜ต์…˜: ๋Œ€์šฉ๋Ÿ‰ ๋ชจ๋ธ ์ฒ˜๋ฆฌ ์‹œ ํ•„์š”
130
+
131
+ #### 6.2 ๋ชจ๋ธ ์ตœ์ ํ™”
132
+ ```python
133
+ # app_huggingface.py์—์„œ ๋ชจ๋ธ ๋กœ๋”ฉ ์ตœ์ ํ™”
134
+ model = AutoModelForCausalLM.from_pretrained(
135
+ model_name,
136
+ torch_dtype=torch.float16, # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ๊ฐ์†Œ
137
+ device_map="auto", # ์ž๋™ ๋””๋ฐ”์ด์Šค ๋ฐฐ์น˜
138
+ low_cpu_mem_usage=True # CPU ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
139
+ )
140
+ ```
141
+
142
+ ### 7. ๋ฌธ์ œ ํ•ด๊ฒฐ
143
+
144
+ #### 7.1 ์ผ๋ฐ˜์ ์ธ ์˜ค๋ฅ˜
145
+
146
+ **๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ ์˜ค๋ฅ˜**
147
+ ```bash
148
+ # requirements.txt์—์„œ ๋ถˆํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์ œ๊ฑฐ
149
+ # ๋ชจ๋ธ ํฌ๊ธฐ ์ถ•์†Œ ๋˜๋Š” ์–‘์žํ™” ์ ์šฉ
150
+ ```
151
+
152
+ **๋นŒ๋“œ ์‹œ๊ฐ„ ์ดˆ๊ณผ**
153
+ ```bash
154
+ # .dockerignore ํŒŒ์ผ๋กœ ๋ถˆํ•„์š”ํ•œ ํŒŒ์ผ ์ œ์™ธ
155
+ # multi-stage build ์‚ฌ์šฉ์œผ๋กœ ๋นŒ๋“œ ์ตœ์ ํ™”
156
+ ```
157
+
158
+ **๋ชจ๋“ˆ import ์˜ค๋ฅ˜**
159
+ ```bash
160
+ # PYTHONPATH ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ํ™•์ธ
161
+ # requirements.txt ์˜์กด์„ฑ ํ™•์ธ
162
+ ```
163
+
164
+ #### 7.2 ๋กœ๊ทธ ๋ถ„์„
165
+ ```bash
166
+ # ๋นŒ๋“œ ๋กœ๊ทธ์—์„œ ์˜ค๋ฅ˜ ์ฐพ๊ธฐ
167
+ grep -i error build.log
168
+
169
+ # ๋Ÿฐํƒ€์ž„ ๋กœ๊ทธ์—์„œ ๋ฌธ์ œ ํ™•์ธ
170
+ tail -f app.log
171
+ ```
172
+
173
+ ### 8. Railway Hearth Chat ์—ฐ๋™ ์ค€๋น„
174
+
175
+ ๋ฐฐํฌ๋œ Hugging Face Space URL์„ ๊ธฐ๋กํ•ด๋‘์„ธ์š”:
176
+ ```
177
+ https://YOUR_USERNAME-lily-llm-api.hf.space
178
+ ```
179
+
180
+ ์ด URL์„ Hearth Chat์˜ AI ์„ค์ •์—์„œ Lily LLM API URL๋กœ ์‚ฌ์šฉํ•˜๊ฒŒ ๋ฉ๋‹ˆ๋‹ค.
181
+
182
+ ### 9. ์œ ์ง€๋ณด์ˆ˜
183
+
184
+ #### 9.1 ์—…๋ฐ์ดํŠธ ๋ฐฐํฌ
185
+ ```bash
186
+ # ์ฝ”๋“œ ์ˆ˜์ • ํ›„
187
+ git add .
188
+ git commit -m "Update: description of changes"
189
+ git push
190
+ ```
191
+
192
+ #### 9.2 ๋ชจ๋‹ˆํ„ฐ๋ง
193
+ - Space ๋Œ€์‹œ๋ณด๋“œ์—์„œ ์‚ฌ์šฉ๋Ÿ‰ ๋ชจ๋‹ˆํ„ฐ๋ง
194
+ - ๋กœ๊ทธ๋ฅผ ํ†ตํ•œ ์˜ค๋ฅ˜ ์ถ”์ 
195
+ - ์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ํ™•์ธ
196
+
197
+ ---
198
+
199
+ ## ๐Ÿ“ž ์ง€์›
200
+
201
+ ๋ฐฐํฌ ์ค‘ ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด:
202
+ 1. Hugging Face ๊ณต์‹ ๋ฌธ์„œ ์ฐธ์กฐ
203
+ 2. Community ํฌ๋Ÿผ์—์„œ ๋„์›€ ์š”์ฒญ
204
+ 3. GitHub Issues๋ฅผ ํ†ตํ•œ ๊ธฐ์ˆ  ์ง€์›
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces์šฉ Lily LLM API Server Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ # Hugging Face Spaces ํ™˜๊ฒฝ ๋ณ€์ˆ˜
5
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
6
+ ENV GRADIO_SERVER_PORT=7860
7
+ ENV PYTHONPATH=/app
8
+ ENV PYTHONUNBUFFERED=1
9
+ ENV TOKENIZERS_PARALLELISM=false
10
+
11
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
12
+ WORKDIR /app
13
+
14
+ # ์‹œ์Šคํ…œ ์˜์กด์„ฑ ์„ค์น˜
15
+ RUN apt-get update && apt-get install -y \
16
+ build-essential \
17
+ curl \
18
+ git \
19
+ wget \
20
+ ffmpeg \
21
+ libsm6 \
22
+ libxext6 \
23
+ libfontconfig1 \
24
+ libxrender1 \
25
+ libgl1-mesa-glx \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ # Python ์˜์กด์„ฑ ์„ค์น˜ (์บ์‹ฑ ์ตœ์ ํ™”)
29
+ COPY requirements_full.txt requirements.txt
30
+ RUN pip install --no-cache-dir --upgrade pip
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
34
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
35
+
36
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ฝ”๋“œ ๋ณต์‚ฌ
37
+ COPY . .
38
+
39
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
40
+ RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
41
+
42
+ # ๊ถŒํ•œ ์„ค์ •
43
+ RUN chmod +x /app/*.py
44
+
45
+ # Hugging Face ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
46
+ ENV TRANSFORMERS_CACHE=/app/cache/transformers
47
+ ENV HF_HOME=/app/cache/huggingface
48
+ ENV HF_HUB_CACHE=/app/cache/huggingface
49
+
50
+ # ํ™˜๊ฒฝ ๊ฐ์ง€ ์„ค์ •
51
+ ENV IS_LOCAL=false
52
+ ENV ENVIRONMENT=production
53
+ ENV DOCKER_ENV=server
54
+
55
+ # Hugging Face Spaces์šฉ ์•ฑ ์‹œ์ž‘์  ์ƒ์„ฑ
56
+ COPY app_huggingface.py /app/app_huggingface.py
57
+
58
+ # ํฌํŠธ ๋…ธ์ถœ (Hugging Face Spaces๋Š” 7860 ํฌํŠธ ์‚ฌ์šฉ)
59
+ EXPOSE 7860
60
+
61
+ # ํ—ฌ์Šค์ฒดํฌ
62
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
63
+ CMD curl -f http://localhost:7860/health || exit 1
64
+
65
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
66
+ CMD ["python", "app_huggingface.py"]
Dockerfile.blank ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API Server Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ build-essential \
10
+ curl \
11
+ git \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Copy requirements first for better caching
15
+ COPY requirements.txt .
16
+
17
+ # Install Python dependencies
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ # Download NLTK data
21
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
22
+
23
+ # Copy application code
24
+ COPY . .
25
+
26
+ # Create necessary directories
27
+ RUN mkdir -p /app/data /app/logs /app/models /app/uploads
28
+
29
+ # Set environment variables
30
+ ENV PYTHONPATH=/app
31
+ ENV PYTHONUNBUFFERED=1
32
+ ENV HOST=0.0.0.0
33
+ ENV PORT=8001
34
+
35
+ # Expose port
36
+ EXPOSE 8001
37
+
38
+ # Health check
39
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
40
+ CMD curl -f http://localhost:8001/health || exit 1
41
+
42
+ # Run the application
43
+ CMD ["python", "run_server_v2.py"]
Dockerfile.gpu ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPU ํ™˜๊ฒฝ์„ ์œ„ํ•œ Dockerfile
2
+ FROM nvidia/cuda:11.8-devel-ubuntu20.04
3
+
4
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV PYTHONUNBUFFERED=1
7
+ ENV CUDA_VISIBLE_DEVICES=0
8
+
9
+ # ์‹œ์Šคํ…œ ํŒจํ‚ค์ง€ ์—…๋ฐ์ดํŠธ ๋ฐ ์„ค์น˜
10
+ RUN apt-get update && apt-get install -y \
11
+ python3.9 \
12
+ python3.9-dev \
13
+ python3-pip \
14
+ git \
15
+ wget \
16
+ curl \
17
+ build-essential \
18
+ libgl1-mesa-glx \
19
+ libglib2.0-0 \
20
+ libsm6 \
21
+ libxext6 \
22
+ libxrender-dev \
23
+ libgomp1 \
24
+ && rm -rf /var/lib/apt/lists/*
25
+
26
+ # Python 3.9์„ ๊ธฐ๋ณธ์œผ๋กœ ์„ค์ •
27
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
28
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1
29
+
30
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
31
+ WORKDIR /app
32
+
33
+ # Python ์˜์กด์„ฑ ํŒŒ์ผ ๋ณต์‚ฌ
34
+ COPY requirements.txt .
35
+
36
+ # Python ํŒจํ‚ค์ง€ ์„ค์น˜
37
+ RUN pip3 install --no-cache-dir -r requirements.txt
38
+
39
+ # GPU ๊ด€๋ จ ํŒจํ‚ค์ง€ ์„ค์น˜
40
+ RUN pip3 install --no-cache-dir \
41
+ torch==2.0.1+cu118 \
42
+ torchvision==0.15.2+cu118 \
43
+ torchaudio==2.0.2+cu118 \
44
+ --index-url https://download.pytorch.org/whl/cu118
45
+
46
+ # ํ”„๋กœ์ ํŠธ ํŒŒ์ผ ๋ณต์‚ฌ
47
+ COPY . .
48
+
49
+ # ํฌํŠธ ์„ค์ •
50
+ EXPOSE 8001
51
+
52
+ # ํ—ฌ์Šค์ฒดํฌ ์ถ”๊ฐ€
53
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
54
+ CMD curl -f http://localhost:8001/health || exit 1
55
+
56
+ # ์‹คํ–‰ ๋ช…๋ น
57
+ CMD ["python3", "run_server_v2.py"]
Dockerfile.huggingface ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces์šฉ Lily LLM API Server Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ # Hugging Face Spaces ํ™˜๊ฒฝ ๋ณ€์ˆ˜
5
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
6
+ ENV GRADIO_SERVER_PORT=7860
7
+ ENV PYTHONPATH=/app
8
+ ENV PYTHONUNBUFFERED=1
9
+ ENV TOKENIZERS_PARALLELISM=false
10
+
11
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
12
+ WORKDIR /app
13
+
14
+ # ์‹œ์Šคํ…œ ์˜์กด์„ฑ ์„ค์น˜
15
+ RUN apt-get update && apt-get install -y \
16
+ build-essential \
17
+ curl \
18
+ git \
19
+ wget \
20
+ ffmpeg \
21
+ libsm6 \
22
+ libxext6 \
23
+ libfontconfig1 \
24
+ libxrender1 \
25
+ libgl1-mesa-glx \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ # Python ์˜์กด์„ฑ ์„ค์น˜ (์บ์‹ฑ ์ตœ์ ํ™”)
29
+ COPY requirements_full.txt requirements.txt
30
+ RUN pip install --no-cache-dir --upgrade pip
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
34
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
35
+
36
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ฝ”๋“œ ๋ณต์‚ฌ
37
+ COPY . .
38
+
39
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
40
+ RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
41
+
42
+ # ๊ถŒํ•œ ์„ค์ •
43
+ RUN chmod +x /app/*.py
44
+
45
+ # Hugging Face ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
46
+ ENV TRANSFORMERS_CACHE=/app/cache/transformers
47
+ ENV HF_HOME=/app/cache/huggingface
48
+ ENV HF_HUB_CACHE=/app/cache/huggingface
49
+
50
+ # ํ™˜๊ฒฝ ๊ฐ์ง€ ์„ค์ •
51
+ ENV IS_LOCAL=false
52
+ ENV ENVIRONMENT=production
53
+ ENV DOCKER_ENV=server
54
+
55
+ # Hugging Face Spaces์šฉ ์•ฑ ์‹œ์ž‘์  ์ƒ์„ฑ
56
+ COPY app_huggingface.py /app/app_huggingface.py
57
+
58
+ # ํฌํŠธ ๋…ธ์ถœ (Hugging Face Spaces๋Š” 7860 ํฌํŠธ ์‚ฌ์šฉ)
59
+ EXPOSE 7860
60
+
61
+ # ํ—ฌ์Šค์ฒดํฌ
62
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
63
+ CMD curl -f http://localhost:7860/health || exit 1
64
+
65
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
66
+ CMD ["python", "app_huggingface.py"]
Dockerfile.huggingface.predownload ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces์šฉ Lily LLM API Server Dockerfile (๋ชจ๋ธ ์‚ฌ์ „ ๋‹ค์šด๋กœ๋“œ ๋ฒ„์ „)
2
+ FROM python:3.11-slim
3
+
4
+ # Hugging Face Spaces ํ™˜๊ฒฝ ๋ณ€์ˆ˜
5
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
6
+ ENV GRADIO_SERVER_PORT=7860
7
+ ENV PYTHONPATH=/app
8
+ ENV PYTHONUNBUFFERED=1
9
+ ENV TOKENIZERS_PARALLELISM=false
10
+
11
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
12
+ WORKDIR /app
13
+
14
+ # ์‹œ์Šคํ…œ ์˜์กด์„ฑ ์„ค์น˜
15
+ RUN apt-get update && apt-get install -y \
16
+ build-essential \
17
+ curl \
18
+ git \
19
+ wget \
20
+ ffmpeg \
21
+ libsm6 \
22
+ libxext6 \
23
+ libfontconfig1 \
24
+ libxrender1 \
25
+ libgl1-mesa-glx \
26
+ && rm -rf /var/lib/apt/lists/*
27
+
28
+ # Python ์˜์กด์„ฑ ์„ค์น˜ (์บ์‹ฑ ์ตœ์ ํ™”)
29
+ COPY requirements_full.txt requirements.txt
30
+ RUN pip install --no-cache-dir --upgrade pip
31
+ RUN pip install --no-cache-dir -r requirements.txt
32
+
33
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
34
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
35
+
36
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
37
+ RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp /app/cache/transformers /app/cache/huggingface
38
+
39
+ # Hugging Face ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
40
+ ENV TRANSFORMERS_CACHE=/app/cache/transformers
41
+ ENV HF_HOME=/app/cache/huggingface
42
+ ENV HF_HUB_CACHE=/app/cache/huggingface
43
+
44
+ # ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์Šคํฌ๋ฆฝํŠธ ๋ณต์‚ฌ ๋ฐ ์‹คํ–‰
45
+ COPY download_model.py /app/download_model.py
46
+ RUN python /app/download_model.py
47
+
48
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ฝ”๋“œ ๋ณต์‚ฌ
49
+ COPY . .
50
+
51
+ # ๊ถŒํ•œ ์„ค์ •
52
+ RUN chmod +x /app/*.py
53
+
54
+ # Hugging Face Spaces์šฉ ์•ฑ ์‹œ์ž‘์  ์ƒ์„ฑ
55
+ COPY app_huggingface.py /app/app_huggingface.py
56
+
57
+ # ํฌํŠธ ๋…ธ์ถœ (Hugging Face Spaces๋Š” 7860 ํฌํŠธ ์‚ฌ์šฉ)
58
+ EXPOSE 7860
59
+
60
+ # ํ—ฌ์Šค์ฒดํฌ
61
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
62
+ CMD curl -f http://localhost:7860/health || exit 1
63
+
64
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์‹คํ–‰
65
+ CMD ["python", "app_huggingface.py"]
Dockerfile.latex-ocr ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LaTeX-OCR ์ „์šฉ ์ปจํ…Œ์ด๋„ˆ (CPU ๊ธฐ๋ฐ˜)
2
+ FROM python:3.9-slim
3
+
4
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV DEBIAN_FRONTEND=noninteractive
7
+
8
+ # ์‹œ์Šคํ…œ ํŒจํ‚ค์ง€ ์„ค์น˜
9
+ RUN apt-get update && apt-get install -y \
10
+ build-essential \
11
+ git \
12
+ wget \
13
+ curl \
14
+ libgl1-mesa-glx \
15
+ libglib2.0-0 \
16
+ libsm6 \
17
+ libxext6 \
18
+ libxrender-dev \
19
+ libgomp1 \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
23
+ WORKDIR /app
24
+
25
+ # LaTeX-OCR ์˜์กด์„ฑ ์„ค์น˜
26
+ RUN pip install --no-cache-dir \
27
+ torch==2.0.1 \
28
+ transformers==4.30.0 \
29
+ timm==0.6.13 \
30
+ numpy==1.24.3 \
31
+ Pillow \
32
+ requests \
33
+ faiss-cpu \
34
+ sentence-transformers \
35
+ pymupdf \
36
+ easyocr
37
+
38
+ # LaTeX-OCR ์„ค์น˜
39
+ RUN pip install --no-cache-dir pix2tex
40
+
41
+ # ํ”„๋กœ์ ํŠธ ํŒŒ์ผ ๋ณต์‚ฌ
42
+ COPY . .
43
+
44
+ # LaTeX-OCR ์„œ๋น„์Šค ์Šคํฌ๋ฆฝํŠธ ์ƒ์„ฑ
45
+ RUN echo '#!/usr/bin/env python3\n\
46
+ import sys\n\
47
+ import os\n\
48
+ sys.path.insert(0, "/app")\n\
49
+ \n\
50
+ from lily_llm_core.latex_ocr_subprocess_v2 import latex_ocr_processor_v2\n\
51
+ \n\
52
+ def main():\n\
53
+ print("LaTeX-OCR ์„œ๋น„์Šค ์‹œ์ž‘...")\n\
54
+ processor = latex_ocr_processor_v2\n\
55
+ print("LaTeX-OCR ์„œ๋น„์Šค ์ค€๋น„ ์™„๋ฃŒ")\n\
56
+ \n\
57
+ # ์„œ๋น„์Šค ์œ ์ง€\n\
58
+ import time\n\
59
+ while True:\n\
60
+ time.sleep(1)\n\
61
+ \n\
62
+ if __name__ == "__main__":\n\
63
+ main()\n\
64
+ ' > /app/latex_ocr_service.py
65
+
66
+ # ์‹คํ–‰ ๊ถŒํ•œ ๋ถ€์—ฌ
67
+ RUN chmod +x /app/latex_ocr_service.py
68
+
69
+ # ์‹คํ–‰ ๋ช…๋ น
70
+ CMD ["python3", "latex_ocr_service.py"]
Dockerfile.local ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๋กœ์ปฌ ๊ฐœ๋ฐœ์šฉ Lily LLM API Server Dockerfile
2
+ FROM python:3.11-slim
3
+
4
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
5
+ ENV IS_LOCAL=true
6
+ ENV ENVIRONMENT=local
7
+ ENV DOCKER_ENV=local
8
+ ENV PYTHONPATH=/app
9
+ ENV PYTHONUNBUFFERED=1
10
+ ENV TOKENIZERS_PARALLELISM=false
11
+
12
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ •
13
+ WORKDIR /app
14
+
15
+ # ์‹œ์Šคํ…œ ์˜์กด์„ฑ ์„ค์น˜
16
+ RUN apt-get update && apt-get install -y \
17
+ build-essential \
18
+ curl \
19
+ git \
20
+ wget \
21
+ ffmpeg \
22
+ libsm6 \
23
+ libxext6 \
24
+ libfontconfig1 \
25
+ libxrender1 \
26
+ libgl1-mesa-glx \
27
+ && rm -rf /var/lib/apt/lists/*
28
+
29
+ # Python ์˜์กด์„ฑ ์„ค์น˜
30
+ COPY requirements_full.txt requirements.txt
31
+ RUN pip install --no-cache-dir --upgrade pip
32
+ RUN pip install --no-cache-dir -r requirements.txt
33
+
34
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
35
+ RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('maxent_ne_chunker'); nltk.download('words'); nltk.download('stopwords')"
36
+
37
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
38
+ RUN mkdir -p /app/data /app/logs /app/models /app/uploads /app/vector_stores /app/temp
39
+
40
+ # ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ์ฝ”๋“œ ๋ณต์‚ฌ
41
+ COPY . .
42
+
43
+ # ๊ถŒํ•œ ์„ค์ •
44
+ RUN chmod +x /app/*.py
45
+
46
+ # ํฌํŠธ ๋…ธ์ถœ (๋กœ์ปฌ ๊ฐœ๋ฐœ์šฉ)
47
+ EXPOSE 8001
48
+
49
+ # ํ—ฌ์Šค์ฒดํฌ
50
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
51
+ CMD curl -f http://localhost:8001/health || exit 1
52
+
53
+ # ๋กœ์ปฌ ๊ฐœ๋ฐœ์šฉ ์•ฑ ์‹œ์ž‘์ 
54
+ CMD ["python", "app_local.py"]
ENVIRONMENT_VARIABLES.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐Ÿ”ง ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ • ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ  ๋กœ์ปฌ ๊ฐœ๋ฐœ ํ™˜๊ฒฝ
4
+
5
+ ### .env ํŒŒ์ผ ์„ค์ •
6
+ ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ์— `.env` ํŒŒ์ผ์„ ์ƒ์„ฑํ•˜๊ณ  ๋‹ค์Œ ๋ณ€์ˆ˜๋“ค์„ ์„ค์ •ํ•˜์„ธ์š”:
7
+
8
+ ```bash
9
+ # ๊ธฐ๋ณธ ์„œ๋ฒ„ ์„ค์ •
10
+ HOST=0.0.0.0
11
+ PORT=8001
12
+ PYTHONPATH=/app
13
+ PYTHONUNBUFFERED=1
14
+
15
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
16
+ IS_LOCAL=true
17
+ ENVIRONMENT=local
18
+ DOCKER_ENV=local
19
+
20
+ # ๋ชจ๋ธ ์„ค์ •
21
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
22
+ MAX_NEW_TOKENS=256
23
+ TEMPERATURE=0.7
24
+
25
+ # ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ (์„ ํƒ์‚ฌํ•ญ)
26
+ LOCAL_MODEL_PATH=./lily_llm_core/models/kanana_1_5_v_3b_instruct
27
+ ```
28
+
29
+ ### ๋กœ์ปฌ Docker ์‹คํ–‰
30
+ ```bash
31
+ # ๋กœ์ปฌ ๊ฐœ๋ฐœ์šฉ Docker ๋นŒ๋“œ
32
+ docker build -f Dockerfile.local -t lily-llm-local .
33
+
34
+ # ๋กœ์ปฌ ์‹คํ–‰ (ํฌํŠธ 8001)
35
+ docker run -p 8001:8001 --env-file .env lily-llm-local
36
+ ```
37
+
38
+ ## โ˜๏ธ Hugging Face Spaces ํ™˜๊ฒฝ
39
+
40
+ ### ํ•„์ˆ˜ ํ™˜๊ฒฝ ๋ณ€์ˆ˜
41
+
42
+ Hugging Face Spaces Settings > Variables์—์„œ ๋‹ค์Œ ๋ณ€์ˆ˜๋“ค์„ ์„ค์ •ํ•˜์„ธ์š”:
43
+
44
+ ### ๊ธฐ๋ณธ ์„œ๋ฒ„ ์„ค์ •
45
+ ```bash
46
+ HOST=0.0.0.0
47
+ PORT=7860
48
+ PYTHONPATH=/app
49
+ PYTHONUNBUFFERED=1
50
+ ```
51
+
52
+ ### Hugging Face ์„ค์ •
53
+ ```bash
54
+ # ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ
55
+ TRANSFORMERS_CACHE=/app/cache/transformers
56
+ HF_HOME=/app/cache/huggingface
57
+ HF_HUB_CACHE=/app/cache/huggingface
58
+
59
+ # ๋ชจ๋ธ ์„ค์ •
60
+ HF_MODEL_NAME=gbrabbit/lily-math-model
61
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
62
+
63
+ # ํ† ํฐํ™” ๋ณ‘๋ ฌ ์ฒ˜๋ฆฌ ๋น„ํ™œ์„ฑํ™” (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)
64
+ TOKENIZERS_PARALLELISM=false
65
+ ```
66
+
67
+ ### ์„ฑ๋Šฅ ์ตœ์ ํ™”
68
+ ```bash
69
+ # CPU ์Šค๋ ˆ๋“œ ์ œํ•œ (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)
70
+ OMP_NUM_THREADS=1
71
+ MKL_NUM_THREADS=1
72
+
73
+ # PyTorch ์„ค์ •
74
+ TORCH_HOME=/app/cache/torch
75
+ PYTORCH_TRANSFORMERS_CACHE=/app/cache/transformers
76
+ ```
77
+
78
+ ### AI ๋ชจ๋ธ ์„ค์ •
79
+ ```bash
80
+ # ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ
81
+ MAX_NEW_TOKENS=256
82
+ TEMPERATURE=0.7
83
+ TOP_P=0.9
84
+ TOP_K=40
85
+ ```
86
+
87
+ ## ์„ ํƒ์  ํ™˜๊ฒฝ ๋ณ€์ˆ˜
88
+
89
+ ### ๋””๋ฒ„๊น…
90
+ ```bash
91
+ # ๋กœ๊ทธ ๋ ˆ๋ฒจ
92
+ LOG_LEVEL=INFO
93
+ DEBUG=false
94
+
95
+ # ์ƒ์„ธ ๋กœ๊น…
96
+ TRANSFORMERS_VERBOSITY=warning
97
+ HF_HUB_VERBOSITY=warning
98
+ ```
99
+
100
+ ### ๋ณด์•ˆ (ํ•„์š”์‹œ)
101
+ ```bash
102
+ # API ํ‚ค (ํ•„์š”ํ•œ ๊ฒฝ์šฐ)
103
+ HF_TOKEN=your_huggingface_token
104
+ API_SECRET_KEY=your_secret_key
105
+ ```
106
+
107
+ ## ๐Ÿš€ ์ž๋™ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ๋™์ž‘ ๋ฐฉ์‹
108
+
109
+ ### 1๋‹จ๊ณ„: ๋กœ์ปฌ ๋ชจ๋ธ ํ™•์ธ
110
+ - `/app/lily_llm_core/models/kanana_1_5_v_3b_instruct/` ๊ฒฝ๋กœ ํ™•์ธ
111
+ - ํŒŒ์ผ์ด ์žˆ์œผ๋ฉด ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ
112
+
113
+ ### 2๋‹จ๊ณ„: Hugging Face Hub ๋‹ค์šด๋กœ๋“œ
114
+ - ๋กœ์ปฌ ๋ชจ๋ธ์ด ์—†์œผ๋ฉด `gbrabbit/lily-math-model`์—์„œ ์ž๋™ ๋‹ค์šด๋กœ๋“œ
115
+ - `/app/cache/transformers/` ๊ฒฝ๋กœ์— ์บ์‹œ ์ €์žฅ
116
+
117
+ ### 3๋‹จ๊ณ„: ๋ชจ๋ธ ๋กœ๋”ฉ
118
+ - ์บ์‹œ๋œ ๋ชจ๋ธ์„ ๋ฉ”๋ชจ๋ฆฌ์— ๋กœ๋“œ
119
+ - ์„œ๋ฒ„ ์‹œ์ž‘ ์™„๋ฃŒ
120
+
121
+ ## ๐Ÿ“Š ์˜ˆ์ƒ ๋™์ž‘
122
+
123
+ ### ์ฒซ ๋ฒˆ์งธ ๋ฐฐํฌ
124
+ ```
125
+ ๐ŸŒ Hugging Face Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: gbrabbit/lily-math-model
126
+ ๐Ÿ“ฅ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘... (์•ฝ 2-5๋ถ„)
127
+ โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ
128
+ ๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘: 0.0.0.0:7860
129
+ ```
130
+
131
+ ### ์ดํ›„ ์žฌ์‹œ์ž‘
132
+ ```
133
+ ๐Ÿ—‚๏ธ ์บ์‹œ๋œ ๋ชจ๋ธ ์‚ฌ์šฉ: /app/cache/transformers/
134
+ โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ (์•ฝ 30์ดˆ)
135
+ ๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘: 0.0.0.0:7860
136
+ ```
137
+
138
+ ## ๐Ÿ” ๋ฌธ์ œ ํ•ด๊ฒฐ
139
+
140
+ ### ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์‹คํŒจ
141
+ ```bash
142
+ # ๋„คํŠธ์›Œํฌ ์—ฐ๊ฒฐ ํ™•์ธ
143
+ curl -I https://huggingface.co/gbrabbit/lily-math-model
144
+
145
+ # Hugging Face Hub ์ƒํƒœ ํ™•์ธ
146
+ curl -I https://huggingface.co/api/models/gbrabbit/lily-math-model
147
+ ```
148
+
149
+ ### ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ
150
+ ```bash
151
+ # ๋” ์ž‘์€ ๋ชจ๋ธ ์‚ฌ์šฉ ๋˜๋Š” ์–‘์žํ™” ์ ์šฉ
152
+ # Hardware ์—…๊ทธ๋ ˆ์ด๋“œ ๊ณ ๋ ค (CPU upgrade ๋˜๋Š” GPU)
153
+ ```
154
+
155
+ ### ์บ์‹œ ๋ฌธ์ œ
156
+ ```bash
157
+ # ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ๊ถŒํ•œ ํ™•์ธ
158
+ ls -la /app/cache/
159
+
160
+ # ์บ์‹œ ์‚ญ์ œ ํ›„ ์žฌ์‹œ์ž‘
161
+ rm -rf /app/cache/transformers/*
162
+ ```
GPU_DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐Ÿš€ GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ“‹ ์‚ฌ์ „ ์š”๊ตฌ์‚ฌํ•ญ
4
+
5
+ ### 1. ํ•˜๋“œ์›จ์–ด ์š”๊ตฌ์‚ฌํ•ญ
6
+ - **GPU**: NVIDIA GPU (RTX 3060 ์ด์ƒ ๊ถŒ์žฅ)
7
+ - **๋ฉ”๋ชจ๋ฆฌ**: ์ตœ์†Œ 16GB RAM, ๊ถŒ์žฅ 32GB RAM
8
+ - **์ €์žฅ๊ณต๊ฐ„**: ์ตœ์†Œ 50GB ์—ฌ์œ  ๊ณต๊ฐ„
9
+
10
+ ### 2. ์†Œํ”„ํŠธ์›จ์–ด ์š”๊ตฌ์‚ฌํ•ญ
11
+
12
+ #### NVIDIA ๋“œ๋ผ์ด๋ฒ„ ์„ค์น˜
13
+ ```bash
14
+ # Ubuntu/Debian
15
+ sudo apt update
16
+ sudo apt install nvidia-driver-470
17
+
18
+ # Windows
19
+ # NVIDIA ์›น์‚ฌ์ดํŠธ์—์„œ ์ตœ์‹  ๋“œ๋ผ์ด๋ฒ„ ๋‹ค์šด๋กœ๋“œ
20
+ ```
21
+
22
+ #### CUDA ์„ค์น˜
23
+ ```bash
24
+ # CUDA 11.8 ์„ค์น˜ (๊ถŒ์žฅ)
25
+ wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
26
+ sudo sh cuda_11.8.0_520.61.05_linux.run
27
+ ```
28
+
29
+ #### Docker ์„ค์น˜
30
+ ```bash
31
+ # Ubuntu/Debian
32
+ curl -fsSL https://get.docker.com -o get-docker.sh
33
+ sudo sh get-docker.sh
34
+ sudo usermod -aG docker $USER
35
+
36
+ # Windows
37
+ # Docker Desktop ์„ค์น˜
38
+ ```
39
+
40
+ #### NVIDIA Docker ์„ค์น˜
41
+ ```bash
42
+ # NVIDIA Container Toolkit ์„ค์น˜
43
+ distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
44
+ curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
45
+ curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
46
+
47
+ sudo apt-get update
48
+ sudo apt-get install -y nvidia-docker2
49
+ sudo systemctl restart docker
50
+ ```
51
+
52
+ ## ๐Ÿ”ง ํ™˜๊ฒฝ ์„ค์ •
53
+
54
+ ### 1. GPU ํ™˜๊ฒฝ ํ™•์ธ
55
+ ```bash
56
+ cd C:\Project\lily_generate_project\lily_generate_package
57
+ python check_gpu_environment.py
58
+ ```
59
+
60
+ ### 2. Hugging Face ์„ค์ •
61
+ ```bash
62
+ # Hugging Face ํ† ํฐ ์„ค์ •
63
+ huggingface-cli login
64
+
65
+ # ๋˜๋Š” Python ์Šคํฌ๋ฆฝํŠธ๋กœ ์„ค์ •
66
+ python huggingface_gpu_setup.py
67
+ ```
68
+
69
+ ## ๐Ÿš€ ๋ฐฐํฌ ์‹คํ–‰
70
+
71
+ ### 1. ์ž๋™ ๋ฐฐํฌ (๊ถŒ์žฅ)
72
+ ```bash
73
+ # ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
74
+ chmod +x deploy_gpu_huggingface.sh
75
+ ./deploy_gpu_huggingface.sh
76
+ ```
77
+
78
+ ### 2. ์ˆ˜๋™ ๋ฐฐํฌ
79
+ ```bash
80
+ # 1. ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
81
+ docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
82
+
83
+ # 2. GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
84
+ nvidia-smi --gpu-reset
85
+
86
+ # 3. ์ด๋ฏธ์ง€ ๋นŒ๋“œ
87
+ docker-compose -f docker-compose.gpu.yml build --no-cache
88
+
89
+ # 4. ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘
90
+ docker-compose -f docker-compose.gpu.yml up -d
91
+
92
+ # 5. ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
93
+ docker-compose -f docker-compose.gpu.yml logs -f
94
+ ```
95
+
96
+ ## ๐Ÿงช ํ…Œ์ŠคํŠธ
97
+
98
+ ### 1. GPU ๋ฐฐํฌ ํ…Œ์ŠคํŠธ
99
+ ```bash
100
+ python test_gpu_deployment.py
101
+ ```
102
+
103
+ ### 2. Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
104
+ ```bash
105
+ python huggingface_gpu_setup.py
106
+ ```
107
+
108
+ ### 3. API ํ…Œ์ŠคํŠธ
109
+ ```bash
110
+ curl http://localhost:8001/health
111
+ ```
112
+
113
+ ## ๐Ÿ“Š ๋ชจ๋‹ˆํ„ฐ๋ง
114
+
115
+ ### 1. GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
116
+ ```bash
117
+ nvidia-smi
118
+ nvidia-smi -l 1 # 1์ดˆ๋งˆ๋‹ค ์—…๋ฐ์ดํŠธ
119
+ ```
120
+
121
+ ### 2. ์ปจํ…Œ์ด๋„ˆ ์ƒํƒœ ํ™•์ธ
122
+ ```bash
123
+ docker ps
124
+ docker stats
125
+ ```
126
+
127
+ ### 3. ๋กœ๊ทธ ํ™•์ธ
128
+ ```bash
129
+ # ์ „์ฒด ๋กœ๊ทธ
130
+ docker-compose -f docker-compose.gpu.yml logs -f
131
+
132
+ # ํŠน์ • ์„œ๋น„์Šค ๋กœ๊ทธ
133
+ docker-compose -f docker-compose.gpu.yml logs -f lily-llm-api-gpu
134
+ ```
135
+
136
+ ## ๐Ÿ”ง ๋ฌธ์ œ ํ•ด๊ฒฐ
137
+
138
+ ### 1. GPU ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ
139
+ ```bash
140
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
141
+ nvidia-smi --gpu-reset
142
+
143
+ # ์ปจํ…Œ์ด๋„ˆ ์žฌ์‹œ์ž‘
144
+ docker-compose -f docker-compose.gpu.yml restart
145
+ ```
146
+
147
+ ### 2. CUDA ๋ฒ„์ „ ์ถฉ๋Œ
148
+ ```bash
149
+ # CUDA ๋ฒ„์ „ ํ™•์ธ
150
+ nvcc --version
151
+
152
+ # PyTorch CUDA ๋ฒ„์ „ ํ™•์ธ
153
+ python -c "import torch; print(torch.version.cuda)"
154
+ ```
155
+
156
+ ### 3. Docker ๊ถŒํ•œ ๋ฌธ์ œ
157
+ ```bash
158
+ # Docker ๊ทธ๋ฃน์— ์‚ฌ์šฉ์ž ์ถ”๊ฐ€
159
+ sudo usermod -aG docker $USER
160
+
161
+ # ์žฌ๋กœ๊ทธ์ธ ํ›„ ํ™•์ธ
162
+ docker ps
163
+ ```
164
+
165
+ ### 4. Hugging Face ํ† ํฐ ๋ฌธ์ œ
166
+ ```bash
167
+ # ํ† ํฐ ์žฌ์„ค์ •
168
+ huggingface-cli logout
169
+ huggingface-cli login
170
+ ```
171
+
172
+ ## ๐Ÿ“ˆ ์„ฑ๋Šฅ ์ตœ์ ํ™”
173
+
174
+ ### 1. ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
175
+ ```bash
176
+ # 4-bit ์–‘์žํ™” ์ ์šฉ
177
+ python huggingface_gpu_setup.py
178
+
179
+ # ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ ์šฉ
180
+ python performance_optimization.py
181
+ ```
182
+
183
+ ### 2. ๋ฐฐ์น˜ ํฌ๊ธฐ ์กฐ์ •
184
+ ```python
185
+ # config.yaml์—์„œ ๋ฐฐ์น˜ ํฌ๊ธฐ ์กฐ์ •
186
+ batch_size: 4 # GPU ๋ฉ”๋ชจ๋ฆฌ์— ๋”ฐ๋ผ ์กฐ์ •
187
+ ```
188
+
189
+ ### 3. ๋ชจ๋ธ ์บ์‹ฑ
190
+ ```bash
191
+ # Hugging Face ์บ์‹œ ์„ค์ •
192
+ export HF_HOME="/path/to/cache"
193
+ export TRANSFORMERS_CACHE="/path/to/cache"
194
+ ```
195
+
196
+ ## ๐Ÿ”„ ์—…๋ฐ์ดํŠธ
197
+
198
+ ### 1. ๋ชจ๋ธ ์—…๋ฐ์ดํŠธ
199
+ ```bash
200
+ # ์ตœ์‹  ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
201
+ python huggingface_gpu_setup.py
202
+
203
+ # ์ปจํ…Œ์ด๋„ˆ ์žฌ์‹œ์ž‘
204
+ docker-compose -f docker-compose.gpu.yml restart
205
+ ```
206
+
207
+ ### 2. ์ฝ”๋“œ ์—…๋ฐ์ดํŠธ
208
+ ```bash
209
+ # ์ฝ”๋“œ ๋ณ€๊ฒฝ ํ›„ ์žฌ๋นŒ๋“œ
210
+ docker-compose -f docker-compose.gpu.yml build --no-cache
211
+ docker-compose -f docker-compose.gpu.yml up -d
212
+ ```
213
+
214
+ ## ๐Ÿ“ž ์ง€์›
215
+
216
+ ### ๋ฌธ์ œ ๋ฐœ์ƒ ์‹œ ํ™•์ธ์‚ฌํ•ญ
217
+ 1. GPU ๋“œ๋ผ์ด๋ฒ„ ๋ฒ„์ „
218
+ 2. CUDA ๋ฒ„์ „
219
+ 3. Docker ๋ฒ„์ „
220
+ 4. ์‹œ์Šคํ…œ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰
221
+ 5. GPU ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰
222
+
223
+ ### ๋กœ๊ทธ ํŒŒ์ผ ์œ„์น˜
224
+ - Docker ๋กœ๊ทธ: `docker-compose -f docker-compose.gpu.yml logs`
225
+ - ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ๋กœ๊ทธ: `logs/` ๋””๋ ‰ํ† ๋ฆฌ
226
+ - GPU ๋กœ๊ทธ: `nvidia-smi`
227
+
228
+ ## ๐ŸŽฏ ์„ฑ๋Šฅ ๋ฒค์น˜๋งˆํฌ
229
+
230
+ ### ๊ถŒ์žฅ ์‚ฌ์–‘๋ณ„ ์„ฑ๋Šฅ
231
+ - **RTX 3060 (12GB)**: ๊ธฐ๋ณธ ๋ชจ๋ธ ์‹คํ–‰ ๊ฐ€๋Šฅ
232
+ - **RTX 3080 (10GB)**: ์ค‘๊ฐ„ ํฌ๊ธฐ ๋ชจ๋ธ ์‹คํ–‰ ๊ฐ€๋Šฅ
233
+ - **RTX 3090 (24GB)**: ๋Œ€์šฉ๋Ÿ‰ ๋ชจ๋ธ ์‹คํ–‰ ๊ฐ€๋Šฅ
234
+ - **RTX 4090 (24GB)**: ์ตœ๊ณ  ์„ฑ๋Šฅ, ๋ชจ๋“  ๋ชจ๋ธ ์‹คํ–‰ ๊ฐ€๋Šฅ
235
+
236
+ ### ๋ฉ”๋ชจ๏ฟฝ๏ฟฝ๏ฟฝ ์‚ฌ์šฉ๋Ÿ‰ ๊ฐ€์ด๋“œ
237
+ - **4-bit ์–‘์žํ™”**: ๋ชจ๋ธ ํฌ๊ธฐ์˜ ์•ฝ 25%
238
+ - **8-bit ์–‘์žํ™”**: ๋ชจ๋ธ ํฌ๊ธฐ์˜ ์•ฝ 50%
239
+ - **16-bit (FP16)**: ๋ชจ๋ธ ํฌ๊ธฐ์˜ ์•ฝ 100%
240
+ - **32-bit (FP32)**: ๋ชจ๋ธ ํฌ๊ธฐ์˜ ์•ฝ 200%
HEARTH_CHAT_INTEGRATION.md ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hearth Chat๊ณผ Lily LLM API ์—ฐ๋™ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ”— ์—ฐ๋™ ๊ฐœ์š”
4
+
5
+ Hugging Face Spaces์— ๋ฐฐํฌ๋œ Lily LLM API๋ฅผ Railway์—์„œ ํ˜ธ์ŠคํŒ…๋˜๋Š” Hearth Chat ์„œ๋น„์Šค์™€ ์—ฐ๋™ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.
6
+
7
+ ## 1. Hugging Face Spaces ๋ฐฐํฌ ์™„๋ฃŒ ํ™•์ธ
8
+
9
+ ### 1.1 API ์—”๋“œํฌ์ธํŠธ ํ™•์ธ
10
+ ๋ฐฐํฌ๋œ Lily LLM API URL:
11
+ ```
12
+ https://YOUR_USERNAME-lily-llm-api.hf.space
13
+ ```
14
+
15
+ ### 1.2 ์ฃผ์š” ์—”๋“œํฌ์ธํŠธ ํ…Œ์ŠคํŠธ
16
+
17
+ ```bash
18
+ # ํ—ฌ์Šค ์ฒดํฌ
19
+ curl https://YOUR_USERNAME-lily-llm-api.hf.space/health
20
+
21
+ # ๋ชจ๋ธ ๋ชฉ๋ก ํ™•์ธ
22
+ curl https://YOUR_USERNAME-lily-llm-api.hf.space/models
23
+
24
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ ํ…Œ์ŠคํŠธ
25
+ curl -X POST https://YOUR_USERNAME-lily-llm-api.hf.space/generate \
26
+ -F "prompt=์•ˆ๋…•ํ•˜์„ธ์š”! ํ…Œ์ŠคํŠธ์ž…๋‹ˆ๋‹ค."
27
+ ```
28
+
29
+ ## 2. Hearth Chat ์„ค์ • ์—…๋ฐ์ดํŠธ
30
+
31
+ ### 2.1 AI ์„ค์ • ๋ชจ๋‹ฌ ์—…๋ฐ์ดํŠธ
32
+
33
+ `hearth_chat_react/src/components/AISettingsModal.js`์—์„œ Lily LLM ์„ค์ • ์ถ”๊ฐ€:
34
+
35
+ ```javascript
36
+ // Lily LLM API URL ์„ค์ •
37
+ {settings.aiProvider === 'lily' && (
38
+ <>
39
+ <div className="setting-group">
40
+ <label className="setting-label">Lily API URL:</label>
41
+ <input
42
+ type="url"
43
+ value={settings.lilyApiUrl}
44
+ onChange={(e) => handleInputChange('lilyApiUrl', e.target.value)}
45
+ placeholder="https://your-username-lily-llm-api.hf.space"
46
+ />
47
+ </div>
48
+
49
+ <div className="setting-group">
50
+ <label className="setting-label">Lily ๋ชจ๋ธ:</label>
51
+ <select
52
+ value={settings.lilyModel}
53
+ onChange={(e) => handleInputChange('lilyModel', e.target.value)}
54
+ >
55
+ <option value="kanana-1.5-v-3b-instruct">Kanana 1.5 v3B Instruct</option>
56
+ </select>
57
+ </div>
58
+
59
+ {/* API ์—ฐ๊ฒฐ ์ƒํƒœ ํ‘œ์‹œ */}
60
+ <div className="model-info">
61
+ <small style={{ color: '#4CAF50', fontWeight: 'bold' }}>
62
+ ๐ŸŒ Hugging Face Spaces์—์„œ ํ˜ธ์ŠคํŒ…
63
+ </small>
64
+ <small style={{ color: '#666', display: 'block', marginTop: '4px' }}>
65
+ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI ๋ชจ๋ธ (ํ…์ŠคํŠธ + ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ)
66
+ </small>
67
+ </div>
68
+ </>
69
+ )}
70
+ ```
71
+
72
+ ### 2.2 ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ ํ•จ์ˆ˜ ์—…๋ฐ์ดํŠธ
73
+
74
+ ```javascript
75
+ case 'lily':
76
+ testUrl = `${settings.lilyApiUrl}/health`;
77
+ testData = {
78
+ method: 'GET',
79
+ headers: {
80
+ 'Accept': 'application/json'
81
+ }
82
+ };
83
+
84
+ // ์ถ”๊ฐ€ ์ƒ์„ฑ ํ…Œ์ŠคํŠธ
85
+ if (response.ok) {
86
+ const generateTestUrl = `${settings.lilyApiUrl}/generate`;
87
+ const generateResponse = await fetch(generateTestUrl, {
88
+ method: 'POST',
89
+ body: new FormData([
90
+ ['prompt', '์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ์ž…๋‹ˆ๋‹ค.']
91
+ ])
92
+ });
93
+
94
+ if (generateResponse.ok) {
95
+ const result = await generateResponse.json();
96
+ console.log('Lily LLM ์ƒ์„ฑ ํ…Œ์ŠคํŠธ ์„ฑ๊ณต:', result);
97
+ }
98
+ }
99
+ break;
100
+ ```
101
+
102
+ ## 3. ๋ฐฑ์—”๋“œ ์—ฐ๋™ ์—…๋ฐ์ดํŠธ
103
+
104
+ ### 3.1 Consumers.py ์ˆ˜์ •
105
+
106
+ `hearth_chat_django/chat/consumers.py`์—์„œ Lily LLM API ํ˜ธ์ถœ ๋ถ€๋ถ„:
107
+
108
+ ```python
109
+ async def call_lily_api(user_message, user_emotion, image_urls=None, documents=None):
110
+ """Lily LLM API ํ˜ธ์ถœ (Hugging Face Spaces)"""
111
+ import requests
112
+ import aiohttp
113
+
114
+ try:
115
+ # ์‚ฌ์šฉ์ž ์„ค์ •์—์„œ API URL ๊ฐ€์ ธ์˜ค๊ธฐ
116
+ user = getattr(self, 'scope', {}).get('user', None)
117
+ ai_settings = None
118
+ if user and hasattr(user, 'is_authenticated') and user.is_authenticated:
119
+ ai_settings = await self.get_user_ai_settings(user)
120
+
121
+ # API URL ์„ค์ • (๊ธฐ๋ณธ๊ฐ’: Hugging Face Spaces)
122
+ lily_api_url = ai_settings.get('lilyApiUrl', 'https://gbrabbit-lily-math-rag.hf.space') if ai_settings else 'https://gbrabbit-lily-math-rag.hf.space'
123
+ lily_model = ai_settings.get('lilyModel', 'kanana-1.5-v-3b-instruct') if ai_settings else 'kanana-1.5-v-3b-instruct'
124
+
125
+ # API ์—”๋“œํฌ์ธํŠธ
126
+ generate_url = f"{lily_api_url}/generate"
127
+
128
+ # ์š”์ฒญ ๋ฐ์ดํ„ฐ ์ค€๋น„
129
+ data = {
130
+ 'prompt': f"{emotion_prompt}\n\n์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€: {user_message}",
131
+ 'max_length': 200,
132
+ 'temperature': 0.7
133
+ }
134
+
135
+ files = {}
136
+
137
+ # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
138
+ if image_urls and len(image_urls) > 0:
139
+ print(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ: {len(image_urls)}๊ฐœ")
140
+
141
+ async with aiohttp.ClientSession() as session:
142
+ for i, image_url in enumerate(image_urls[:4]): # ์ตœ๋Œ€ 4๊ฐœ ์ด๋ฏธ์ง€
143
+ try:
144
+ async with session.get(image_url) as img_response:
145
+ if img_response.status == 200:
146
+ image_data = await img_response.read()
147
+ files[f'image{i+1}'] = ('image.jpg', image_data, 'image/jpeg')
148
+ except Exception as e:
149
+ print(f"โŒ ์ด๋ฏธ์ง€ {i+1} ๋กœ๋“œ ์‹คํŒจ: {e}")
150
+
151
+ # API ํ˜ธ์ถœ
152
+ timeout = aiohttp.ClientTimeout(total=120) # 2๋ถ„ ํƒ€์ž„์•„์›ƒ
153
+
154
+ async with aiohttp.ClientSession(timeout=timeout) as session:
155
+ if files:
156
+ # ๋ฉ€ํ‹ฐํŒŒํŠธ ์š”์ฒญ (์ด๋ฏธ์ง€ ํฌํ•จ)
157
+ form_data = aiohttp.FormData()
158
+ for key, value in data.items():
159
+ form_data.add_field(key, str(value))
160
+ for key, (filename, file_data, content_type) in files.items():
161
+ form_data.add_field(key, file_data, filename=filename, content_type=content_type)
162
+
163
+ async with session.post(generate_url, data=form_data) as response:
164
+ if response.status == 200:
165
+ result = await response.json()
166
+ lily_response = result.get('generated_text', '์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.')
167
+
168
+ return {
169
+ "response": lily_response,
170
+ "provider": "lily",
171
+ "ai_name": "Lily LLM",
172
+ "ai_type": "huggingface"
173
+ }
174
+ else:
175
+ error_text = await response.text()
176
+ raise Exception(f"Lily API ์˜ค๋ฅ˜: {response.status} - {error_text}")
177
+ else:
178
+ # ์ผ๋ฐ˜ POST ์š”์ฒญ (ํ…์ŠคํŠธ๋งŒ)
179
+ async with session.post(generate_url, data=data) as response:
180
+ if response.status == 200:
181
+ result = await response.json()
182
+ lily_response = result.get('generated_text', '์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.')
183
+
184
+ return {
185
+ "response": lily_response,
186
+ "provider": "lily",
187
+ "ai_name": "Lily LLM",
188
+ "ai_type": "huggingface"
189
+ }
190
+ else:
191
+ error_text = await response.text()
192
+ raise Exception(f"Lily API ์˜ค๋ฅ˜: {response.status} - {error_text}")
193
+
194
+ except Exception as e:
195
+ print(f"โŒ Lily LLM API ํ˜ธ์ถœ ์‹คํŒจ: {e}")
196
+ raise e
197
+ ```
198
+
199
+ ### 3.2 ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
200
+
201
+ Railway ํ™˜๊ฒฝ์—์„œ ๋‹ค์Œ ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์ถ”๊ฐ€:
202
+
203
+ ```bash
204
+ # Lily LLM API ์„ค์ •
205
+ LILY_LLM_API_URL=https://YOUR_USERNAME-lily-llm-api.hf.space
206
+ LILY_LLM_MODEL=kanana-1.5-v-3b-instruct
207
+ LILY_LLM_TIMEOUT=120
208
+ ```
209
+
210
+ ## 4. ํ…Œ์ŠคํŠธ ๋ฐ ๊ฒ€์ฆ
211
+
212
+ ### 4.1 ์—ฐ๋™ ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
213
+
214
+ ```python
215
+ # test_hearth_lily_integration.py
216
+
217
+ import requests
218
+ import json
219
+
220
+ def test_hearth_chat_lily_integration():
221
+ """Hearth Chat๊ณผ Lily LLM ์—ฐ๋™ ํ…Œ์ŠคํŠธ"""
222
+
223
+ # Hearth Chat API ์—”๋“œํฌ์ธํŠธ (Railway)
224
+ hearth_chat_url = "https://your-hearth-chat.railway.app"
225
+
226
+ # 1. ๋กœ๊ทธ์ธ ๋ฐ ์„ธ์…˜ ํš๋“
227
+ session = requests.Session()
228
+
229
+ # 2. AI ์„ค์ • ์—…๋ฐ์ดํŠธ
230
+ ai_settings = {
231
+ "aiProvider": "lily",
232
+ "lilyApiUrl": "https://YOUR_USERNAME-lily-llm-api.hf.space",
233
+ "lilyModel": "kanana-1.5-v-3b-instruct",
234
+ "aiEnabled": True
235
+ }
236
+
237
+ settings_response = session.patch(
238
+ f"{hearth_chat_url}/api/chat/user/settings/",
239
+ json=ai_settings
240
+ )
241
+
242
+ print(f"์„ค์ • ์—…๋ฐ์ดํŠธ: {settings_response.status_code}")
243
+
244
+ # 3. ์ฑ„ํŒ… ํ…Œ์ŠคํŠธ
245
+ test_messages = [
246
+ "์•ˆ๋…•ํ•˜์„ธ์š”! Lily LLM ํ…Œ์ŠคํŠธ์ž…๋‹ˆ๋‹ค.",
247
+ "์˜ค๋Š˜ ๋‚ ์”จ๊ฐ€ ์–ด๋–ค๊ฐ€์š”?",
248
+ "๊ฐ„๋‹จํ•œ ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋‚ด์ฃผ์„ธ์š”."
249
+ ]
250
+
251
+ for message in test_messages:
252
+ print(f"\n๐Ÿ“ค ํ…Œ์ŠคํŠธ ๋ฉ”์‹œ์ง€: {message}")
253
+
254
+ # WebSocket ๋˜๋Š” HTTP API๋ฅผ ํ†ตํ•œ ๋ฉ”์‹œ์ง€ ์ „์†ก
255
+ # (์‹ค์ œ ๊ตฌํ˜„์— ๋”ฐ๋ผ ์กฐ์ •)
256
+
257
+ # ์‘๋‹ต ํ™•์ธ
258
+ print(f"โœ… ์‘๋‹ต ๋ฐ›์Œ")
259
+
260
+ if __name__ == "__main__":
261
+ test_hearth_chat_lily_integration()
262
+ ```
263
+
264
+ ### 4.2 ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ํ…Œ์ŠคํŠธ
265
+
266
+ ```python
267
+ def test_image_processing():
268
+ """์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์—ฐ๋™ ํ…Œ์ŠคํŠธ"""
269
+
270
+ # ํ…Œ์ŠคํŠธ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ
271
+ with open("test_image.jpg", "rb") as f:
272
+ files = {"image": f}
273
+ data = {"message": "์ด๋ฏธ์ง€์—์„œ ๋ฌด์—‡์„ ๋ณผ ์ˆ˜ ์žˆ๋‚˜์š”?"}
274
+
275
+ response = requests.post(
276
+ "https://your-hearth-chat.railway.app/api/chat/send-message/",
277
+ files=files,
278
+ data=data
279
+ )
280
+
281
+ print(f"์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ํ…Œ์ŠคํŠธ: {response.status_code}")
282
+ print(f"์‘๋‹ต: {response.json()}")
283
+ ```
284
+
285
+ ## 5. ๋ชจ๋‹ˆํ„ฐ๋ง ๋ฐ ๋กœ๊ทธ
286
+
287
+ ### 5.1 Hugging Face Spaces ๋กœ๊ทธ ๋ชจ๋‹ˆํ„ฐ๋ง
288
+
289
+ ```bash
290
+ # Spaces ๋Œ€์‹œ๋ณด๋“œ์—์„œ ์‹ค์‹œ๊ฐ„ ๋กœ๊ทธ ํ™•์ธ
291
+ # API ํ˜ธ์ถœ ๋นˆ๋„ ๋ฐ ์‘๋‹ต ์‹œ๊ฐ„ ๋ชจ๋‹ˆํ„ฐ๋ง
292
+ ```
293
+
294
+ ### 5.2 Railway ๋กœ๊ทธ ๋ชจ๋‹ˆํ„ฐ๋ง
295
+
296
+ ```bash
297
+ # Railway ๋Œ€์‹œ๋ณด๋“œ์—์„œ Hearth Chat ๋กœ๊ทธ ํ™•์ธ
298
+ # Lily LLM API ํ˜ธ์ถœ ์„ฑ๊ณต/์‹คํŒจ ๋ชจ๋‹ˆํ„ฐ๋ง
299
+ ```
300
+
301
+ ## 6. ์„ฑ๋Šฅ ์ตœ์ ํ™”
302
+
303
+ ### 6.1 ์บ์‹ฑ ์ „๋žต
304
+
305
+ ```python
306
+ # Redis๋ฅผ ์ด์šฉํ•œ ์‘๋‹ต ์บ์‹ฑ
307
+ import redis
308
+
309
+ redis_client = redis.Redis(host='localhost', port=6379, db=0)
310
+
311
+ def cached_lily_response(prompt_hash, response):
312
+ """์‘๋‹ต ์บ์‹ฑ"""
313
+ redis_client.setex(f"lily_cache:{prompt_hash}", 3600, json.dumps(response))
314
+
315
+ def get_cached_response(prompt_hash):
316
+ """์บ์‹œ๋œ ์‘๋‹ต ์กฐํšŒ"""
317
+ cached = redis_client.get(f"lily_cache:{prompt_hash}")
318
+ return json.loads(cached) if cached else None
319
+ ```
320
+
321
+ ### 6.2 ๋กœ๋“œ ๋ฐธ๋Ÿฐ์‹ฑ
322
+
323
+ ```python
324
+ # ์—ฌ๋Ÿฌ Hugging Face Spaces ์ธ์Šคํ„ด์Šค ์‚ฌ์šฉ
325
+ LILY_API_ENDPOINTS = [
326
+ "https://username1-lily-llm-api.hf.space",
327
+ "https://username2-lily-llm-api.hf.space"
328
+ ]
329
+
330
+ def get_available_endpoint():
331
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์—”๋“œํฌ์ธํŠธ ์„ ํƒ"""
332
+ for endpoint in LILY_API_ENDPOINTS:
333
+ try:
334
+ response = requests.get(f"{endpoint}/health", timeout=5)
335
+ if response.status_code == 200:
336
+ return endpoint
337
+ except:
338
+ continue
339
+ return LILY_API_ENDPOINTS[0] # ๊ธฐ๋ณธ๊ฐ’
340
+ ```
341
+
342
+ ## 7. ๋ณด์•ˆ ๊ณ ๋ ค์‚ฌํ•ญ
343
+
344
+ ### 7.1 API ํ‚ค ๊ด€๋ฆฌ
345
+
346
+ ```python
347
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ๋ฏผ๊ฐํ•œ ์ •๋ณด ๊ด€๋ฆฌ
348
+ import os
349
+
350
+ LILY_API_KEY = os.getenv('LILY_API_KEY') # ํ•„์š”์‹œ
351
+ LILY_API_SECRET = os.getenv('LILY_API_SECRET') # ํ•„์š”์‹œ
352
+ ```
353
+
354
+ ### 7.2 ์š”์ฒญ ์ œํ•œ
355
+
356
+ ```python
357
+ # ์‚ฌ์šฉ์ž๋ณ„ ์š”์ฒญ ์ œํ•œ
358
+ from django.core.cache import cache
359
+
360
+ def check_rate_limit(user_id):
361
+ """์‚ฌ์šฉ์ž๋ณ„ ์š”์ฒญ ์ œํ•œ ํ™•์ธ"""
362
+ key = f"lily_api_rate_limit:{user_id}"
363
+ current = cache.get(key, 0)
364
+
365
+ if current >= 100: # ์‹œ๊ฐ„๋‹น 100ํšŒ ์ œํ•œ
366
+ return False
367
+
368
+ cache.set(key, current + 1, 3600) # 1์‹œ๊ฐ„
369
+ return True
370
+ ```
371
+
372
+ ---
373
+
374
+ ## ๐ŸŽ‰ ์—ฐ๋™ ์™„๋ฃŒ
375
+
376
+ ๋ชจ๋“  ์„ค์ •์ด ์™„๋ฃŒ๋˜๋ฉด:
377
+
378
+ 1. **Hugging Face Spaces**: Lily LLM API ์„œ๋ฒ„ ํ˜ธ์ŠคํŒ…
379
+ 2. **Railway**: Hearth Chat ์„œ๋น„์Šค ํ˜ธ์ŠคํŒ…
380
+ 3. **์—ฐ๋™**: ๋‘ ์„œ๋น„์Šค ๊ฐ„ ์›ํ™œํ•œ ํ†ต์‹ 
381
+
382
+ ์‚ฌ์šฉ์ž๋Š” Hearth Chat ์ธํ„ฐํŽ˜์ด์Šค๋ฅผ ํ†ตํ•ด Hugging Face์—์„œ ํ˜ธ์ŠคํŒ…๋˜๋Š” ๊ฐ•๋ ฅํ•œ Lily LLM AI๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๊ฒŒ ๋ฉ๋‹ˆ๋‹ค! ๐Ÿš€
HISTORY.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM RAG ์‹œ์Šคํ…œ ๊ฐœ๋ฐœ ํžˆ์Šคํ† ๋ฆฌ
2
+
3
+ ## ๐Ÿ“‹ ํ”„๋กœ์ ํŠธ ๊ฐœ์š”
4
+ - **๋ชฉํ‘œ**: PDF ๋ฌธ์„œ์˜ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด์„ ๋ฐ ํ•ด๊ฒฐ์„ ์œ„ํ•œ RAG ์‹œ์Šคํ…œ ๊ตฌ์ถ•
5
+ - **ํ™˜๊ฒฝ**: CPU ๊ธฐ๋ฐ˜ ๊ฐœ๋ฐœ ํ™˜๊ฒฝ (GPU ์„œ๋ฒ„ ๋ฐฐํฌ ์ „ ํ…Œ์ŠคํŠธ)
6
+ - **์ ‘๊ทผ ๋ฐฉ์‹**: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG โ†’ ์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ๋น ๋ฅธ ๊ฒ€์ฆ
7
+
8
+ ## ๐Ÿ”„ ์ฃผ์š” ์ž‘์—… ํ๋ฆ„
9
+
10
+ ### 1๋‹จ๊ณ„: ๊ธฐ์กด ๋ฌธ์ œ ๋ถ„์„
11
+ - **๋ฌธ์ œ**: Kanana ๋ชจ๋ธ์˜ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ์—์„œ ํ† ํฐ ID ์‘๋‹ต ๋ฌธ์ œ
12
+ - **์›์ธ**: CPU ํ™˜๊ฒฝ์—์„œ Kanana ๋ชจ๋ธ์˜ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ํ•œ๊ณ„
13
+ - **๊ฒฐ์ •**: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG๋กœ ์ „ํ™˜ํ•˜์—ฌ ์•ˆ์ •์„ฑ ํ™•๋ณด
14
+
15
+ ### 2๋‹จ๊ณ„: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ ๊ตฌ์ถ•
16
+
17
+ #### 2.1 PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๊ฐœ์„ 
18
+ **ํŒŒ์ผ**: `lily_llm_core/document_processor.py`
19
+ ```python
20
+ # ๋ณ€๊ฒฝ ์ „: ์ด๋ฏธ์ง€ ๊ธฐ๋ฐ˜ OCR ์ฒ˜๋ฆฌ
21
+ def process_document(self, file_path: str) -> List[Document]:
22
+ if self.get_file_type(file_path) == 'pdf':
23
+ return self._process_pdf_as_images(file_path) # ์ด๋ฏธ์ง€ ๋ณ€ํ™˜
24
+
25
+ # ๋ณ€๊ฒฝ ํ›„: ํ…์ŠคํŠธ ์ง์ ‘ ์ถ”์ถœ
26
+ def process_document(self, file_path: str) -> List[Document]:
27
+ # ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์ฒ˜๋ฆฌ (๋ชจ๋“  ํŒŒ์ผ ํ˜•์‹)
28
+ documents = self.load_document(file_path)
29
+ split_docs = self.split_documents(documents)
30
+ return split_docs
31
+ ```
32
+
33
+ #### 2.2 RAG ํ”„๋กœ์„ธ์„œ ๋‹จ์ˆœํ™”
34
+ **ํŒŒ์ผ**: `lily_llm_core/rag_processor.py`
35
+ ```python
36
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์ œ๊ฑฐ, ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜์œผ๋กœ ๋‹จ์ˆœํ™”
37
+ def generate_rag_response(self, user_id: str, document_id: str, query: str,
38
+ llm_model=None, image_files: List[str] = None) -> Dict[str, Any]:
39
+ # 1. ์œ ์‚ฌํ•œ ๋ฌธ์„œ ๊ฒ€์ƒ‰
40
+ similar_docs = vector_store_manager.search_similar(
41
+ user_id, document_id, query, k=self.max_search_results
42
+ )
43
+
44
+ # 2. ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ
45
+ return self._generate_text_response(query, similar_docs, llm_model, image_files)
46
+ ```
47
+
48
+ #### 2.3 LLM ์—†์ด๋„ ์ž‘๋™ํ•˜๋Š” ๊ตฌ์กฐํ™”๋œ ์‘๋‹ต
49
+ ```python
50
+ def _generate_text_response(self, query: str, text_docs: List[Document],
51
+ llm_model, image_files: List[str] = None) -> Dict[str, Any]:
52
+ # ์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ (์ž‘์€ ํ…Œ์ŠคํŠธ๋ฅผ ์œ„ํ•ด ๊ธธ์ด ์ œํ•œ)
53
+ context = self._build_context(text_docs)
54
+ if len(context) > 2000:
55
+ context = context[:2000] + "..."
56
+
57
+ # LLM ๋ชจ๋ธ์ด ์žˆ์œผ๋ฉด ์‘๋‹ต ์ƒ์„ฑ, ์—†์œผ๋ฉด ์ปจํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜
58
+ if llm_model:
59
+ response = self._generate_with_llm_simple(prompt, llm_model)
60
+ else:
61
+ # ๊ตฌ์กฐํ™”๋œ ํ…์ŠคํŠธ ์‘๋‹ต ์ƒ์„ฑ
62
+ response = f"""๋ฌธ์„œ์—์„œ ๊ฒ€์ƒ‰๋œ ๊ด€๋ จ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ๋‹ต๋ณ€๋“œ๋ฆฝ๋‹ˆ๋‹ค:
63
+
64
+ ๐Ÿ“‹ ๊ฒ€์ƒ‰๋œ ๋‚ด์šฉ:
65
+ {context}
66
+
67
+ โ“ ์งˆ๋ฌธ: {query}
68
+
69
+ ๐Ÿ’ก ๋‹ต๋ณ€: ์œ„ ๊ฒ€์ƒ‰๋œ ๋‚ด์šฉ์„ ์ฐธ๊ณ ํ•˜์—ฌ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์•„๋ณด์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค."""
70
+ ```
71
+
72
+ ### 3๋‹จ๊ณ„: ํ…Œ์ŠคํŠธ ์‹œ์Šคํ…œ ๊ตฌ์ถ•
73
+
74
+ #### 3.1 ์„œ๋ฒ„ ์—ฐ๊ฒฐ ๋ฐ ๋ฌธ์„œ ์—…๋กœ๋“œ ํ…Œ์ŠคํŠธ
75
+ **ํŒŒ์ผ**: `test_simple_rag.py`
76
+ ```python
77
+ def test_server_connection():
78
+ response = requests.get("http://localhost:8001/health", timeout=10)
79
+ return response.status_code == 200
80
+
81
+ def test_document_upload():
82
+ response = requests.post(
83
+ "http://localhost:8001/document/upload",
84
+ files=files,
85
+ data=data,
86
+ timeout=120
87
+ )
88
+ ```
89
+
90
+ #### 3.2 ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ํ…Œ์ŠคํŠธ
91
+ **ํŒŒ์ผ**: `test_text_only_rag.py`
92
+ ```python
93
+ def test_text_only_rag():
94
+ test_queries = [
95
+ "1๋ฒˆ ๋ฌธ์ œ",
96
+ "2๋ฒˆ ๋ฌธ์ œ",
97
+ "3๋ฒˆ ๋ฌธ์ œ",
98
+ "์ˆ˜ํ•™ ๋ฌธ์ œ"
99
+ ]
100
+
101
+ for query in test_queries:
102
+ rag_data = {
103
+ 'user_id': 'test_user',
104
+ 'document_id': document_id,
105
+ 'query': query
106
+ }
107
+ response = requests.post(
108
+ f"{base_url}/rag/generate",
109
+ data=rag_data,
110
+ timeout=60
111
+ )
112
+ ```
113
+
114
+ #### 3.3 ๊ตฌ์ฒด์ ์ธ ์ˆ˜ํ•™ ๋ฌธ์ œ ์งˆ๋ฌธ ํ…Œ์ŠคํŠธ
115
+ **ํŒŒ์ผ**: `test_specific_questions.py`
116
+ ```python
117
+ test_queries = [
118
+ "23๋ฒˆ ๋ฌธ์ œ์˜ ๋‹ต์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
119
+ "24๋ฒˆ ๋ฌธ์ œ๋ฅผ ํ’€์–ด์ฃผ์„ธ์š”",
120
+ "15๋ฒˆ ๋ฌธ์ œ์˜ ๋‹ต์„ ๊ตฌํ•ด์ฃผ์„ธ์š”",
121
+ "23๋ฒˆ ๋ฌธ์ œ์—์„œ 5๊ฐœ์˜ ๋ฌธ์ž๋ฅผ ์ผ๋ ฌ๋กœ ๋‚˜์—ดํ•˜๋Š” ๊ฒฝ์šฐ์˜ ์ˆ˜๋Š”?",
122
+ "24๋ฒˆ ๋ฌธ์ œ์—์„œ P(B)์˜ ๊ฐ’์€?"
123
+ ]
124
+ ```
125
+
126
+ ### 4๋‹จ๊ณ„: ์„ฑ๊ณผ ํ™•์ธ
127
+
128
+ #### 4.1 ์„ฑ๊ณตํ•œ ๊ธฐ๋Šฅ๋“ค
129
+ - โœ… **PDF ํ…์ŠคํŠธ ์ถ”์ถœ ์™„๋ฒฝ**: ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
130
+ - โœ… **๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ ์™„๋ฒฝ**: ๊ด€๋ จ ๋ฌธ์ œ๋“ค์„ ์ •ํ™•ํžˆ ์ฐพ์•„๋ƒ„
131
+ - โœ… **๋น ๋ฅธ ์ฒ˜๋ฆฌ**: ์ฆ‰์‹œ ์‘๋‹ต (LLM ์—†์ด๋„ ์ž‘๋™)
132
+ - โœ… **๊ตฌ์กฐํ™”๋œ ์‘๋‹ต**: ๋ฌธ์ œ ๋ถ„์„๊ณผ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• ์ œ์‹œ
133
+ - โœ… **์ •ํ™•ํ•œ ๋ฌธ์ œ ๋งค์นญ**: 23๋ฒˆ, 24๋ฒˆ, 15๋ฒˆ ๋ฌธ์ œ ์ •ํ™•ํžˆ ์ฐพ์Œ
134
+
135
+ #### 4.2 ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ
136
+ - **๋ฌธ์„œ ์—…๋กœ๋“œ**: 12๊ฐœ ์ฒญํฌ ์„ฑ๊ณต
137
+ - **๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ**: 5๊ฐœ์”ฉ ์ •ํ™•ํžˆ ๋ฐ˜ํ™˜
138
+ - **์‘๋‹ต ์‹œ๊ฐ„**: ์ฆ‰์‹œ (ํ† ํฐ ID ๋ฌธ์ œ ํ•ด๊ฒฐ๋จ)
139
+ - **๋ฌธ์ œ ์ธ์‹**: ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
140
+
141
+ ## ๐ŸŽฏ ์ตœ์ข… ์„ฑ๊ณผ
142
+
143
+ ### ํ•ด๊ฒฐ๋œ ๋ฌธ์ œ๋“ค
144
+ 1. โœ… **ํ† ํฐ ID ๋ฌธ์ œ**: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•ด๊ฒฐ
145
+ 2. โœ… **PDF ํ…๏ฟฝ๏ฟฝํŠธ ์ถ”์ถœ**: ์‹ค์ œ ๋ฌธ์ œ ๋‚ด์šฉ ์ถ”์ถœ ์„ฑ๊ณต
146
+ 3. โœ… **๋น ๋ฅธ ๊ฒ€์ฆ**: ์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ์„ฑ๊ณต
147
+ 4. โœ… **๊ตฌ์กฐํ™”๋œ ์‘๋‹ต**: ๋ฌธ์ œ ๋ถ„์„๊ณผ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• ํฌํ•จ
148
+
149
+ ### ํ˜„์žฌ ์ƒํƒœ
150
+ **"์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ๋น ๋ฅธ ๊ฒ€์ฆ"** ๋ชฉํ‘œ ๋‹ฌ์„ฑ!
151
+
152
+ - CPU ํ™˜๊ฒฝ์—์„œ๋„ ๋น ๋ฅด๊ฒŒ ์ž‘๋™
153
+ - ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
154
+ - ๊ฒ€์ƒ‰๊ณผ ์ปจํ…์ŠคํŠธ ๋ฐ˜ํ™˜ ์™„๋ฒฝ ์ž‘๋™
155
+ - ๊ตฌ์กฐํ™”๋œ ์‘๋‹ต ์ƒ์„ฑ
156
+
157
+ **๋‹ค์Œ ๋‹จ๊ณ„**: ์„œ๋ฒ„์— ์˜ฌ๋ ค์„œ GPU๋กœ ์‹ค์‚ฌ์šฉ ์ค€๋น„ ์™„๋ฃŒ
158
+
159
+ ## ๐Ÿ“ ์ฃผ์š” ์ˆ˜์ • ํŒŒ์ผ๋“ค
160
+
161
+ ### Core ํŒŒ์ผ๋“ค
162
+ - `lily_llm_core/document_processor.py`: PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๊ฐœ์„ 
163
+ - `lily_llm_core/rag_processor.py`: RAG ํ”„๋กœ์„ธ์„œ ๋‹จ์ˆœํ™”
164
+ - `lily_llm_api/app_v2.py`: ์—”๋“œํฌ์ธํŠธ ์ˆ˜์ •
165
+
166
+ ### ํ…Œ์ŠคํŠธ ํŒŒ์ผ๋“ค
167
+ - `test_simple_rag.py`: ๊ธฐ๋ณธ ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ
168
+ - `test_text_only_rag.py`: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ํ…Œ์ŠคํŠธ
169
+ - `test_specific_questions.py`: ๊ตฌ์ฒด์  ์งˆ๋ฌธ ํ…Œ์ŠคํŠธ
170
+ - `test_llm_rag.py`: LLM ํฌํ•จ ํ…Œ์ŠคํŠธ
171
+
172
+ ## ๐Ÿ”ง ๊ธฐ์ˆ ์  ๊ฐœ์„ ์‚ฌํ•ญ
173
+
174
+ ### 1. PDF ์ฒ˜๋ฆฌ ๋ฐฉ์‹ ๋ณ€๊ฒฝ
175
+ - **์ด์ „**: ์ด๋ฏธ์ง€ ๊ธฐ๋ฐ˜ OCR โ†’ ํ† ํฐ ID ๋ฌธ์ œ
176
+ - **ํ˜„์žฌ**: ํ…์ŠคํŠธ ์ง์ ‘ ์ถ”์ถœ โ†’ ์•ˆ์ •์  ์ฒ˜๋ฆฌ
177
+
178
+ ### 2. RAG ํ”„๋กœ์„ธ์„œ ๋‹จ์ˆœํ™”
179
+ - **์ด์ „**: ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ณต์žกํ•œ ์ฒ˜๋ฆฌ
180
+ - **ํ˜„์žฌ**: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ๋‹จ์ˆœ ์ฒ˜๋ฆฌ
181
+
182
+ ### 3. ์‘๋‹ต ๊ตฌ์กฐ ๊ฐœ์„ 
183
+ - **์ด์ „**: ํ† ํฐ ID ์‘๋‹ต
184
+ - **ํ˜„์žฌ**: ๊ตฌ์กฐํ™”๋œ ํ…์ŠคํŠธ ์‘๋‹ต
185
+
186
+ ## ๐Ÿš€ ๋‹ค์Œ ๋‹จ๊ณ„ ์ œ์•ˆ
187
+
188
+ 1. **GPU ์„œ๋ฒ„ ๋ฐฐํฌ**: ํ˜„์žฌ CPU ํ…Œ์ŠคํŠธ ์™„๋ฃŒ
189
+ 2. **LLM ํ†ตํ•ฉ ๊ฐœ์„ **: ํ† ํฐ ๋””์ฝ”๋”ฉ ๋ฌธ์ œ ํ•ด๊ฒฐ
190
+ 3. **์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ**: ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด ๋Šฅ๋ ฅ ํ–ฅ์ƒ
191
+ 4. **์„ฑ๋Šฅ ์ตœ์ ํ™”**: ๋” ํฐ ๋ฐ์ดํ„ฐ์…‹ ์ฒ˜๋ฆฌ
HUGGINGFACE_CLOUD_GUIDE.md ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # โ˜๏ธ Hugging Face ํด๋ผ์šฐ๋“œ GPU ๋ฐฐํฌ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ“‹ ๊ฐœ์š”
4
+
5
+ ์ด ๊ฐ€์ด๋“œ๋Š” ๋กœ์ปฌ PC์—์„œ Hugging Face ํด๋ผ์šฐ๋“œ GPU ํ™˜๊ฒฝ์„ ์„ค์ •ํ•˜๊ณ , AI ๋ชจ๋ธ์„ ๋ฐฐํฌํ•œ ํ›„ Railway์—์„œ ์‹คํ–‰ ์ค‘์ธ Hearth Chat๊ณผ ์—ฐ๋™ํ•˜๋Š” ๋ฐฉ๋ฒ•์„ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.
6
+
7
+ ## ๐ŸŽฏ ๋ชฉํ‘œ
8
+
9
+ 1. **Hugging Face ํด๋ผ์šฐ๋“œ GPU ํ™˜๊ฒฝ ์„ค์ •**
10
+ 2. **AI ๋ชจ๋ธ์„ Hugging Face Hub์— ์—…๋กœ๋“œ**
11
+ 3. **Inference Endpoints ์ƒ์„ฑ**
12
+ 4. **Railway Hearth Chat๊ณผ ์—ฐ๋™**
13
+
14
+ ## ๐Ÿš€ 1๋‹จ๊ณ„: Hugging Face ๊ณ„์ • ์„ค์ •
15
+
16
+ ### 1.1 Hugging Face ๊ณ„์ • ์ƒ์„ฑ
17
+ 1. **Hugging Face ์›น์‚ฌ์ดํŠธ ๋ฐฉ๋ฌธ**: https://huggingface.co
18
+ 2. **ํšŒ์›๊ฐ€์ž…**: ์ด๋ฉ”์ผ๋กœ ๊ณ„์ • ์ƒ์„ฑ
19
+ 3. **ํ”„๋กœํ•„ ์„ค์ •**: ์‚ฌ์šฉ์ž๋ช… ์„ค์ • (์˜ˆ: `your-username`)
20
+
21
+ ### 1.2 Access Token ์ƒ์„ฑ
22
+ 1. **Settings > Access Tokens**: https://huggingface.co/settings/tokens
23
+ 2. **New Token ์ƒ์„ฑ**:
24
+ - Name: `lily-math-rag-token`
25
+ - Role: `Write`
26
+ 3. **ํ† ํฐ ๋ณต์‚ฌ**: ์ƒ์„ฑ๋œ ํ† ํฐ์„ ์•ˆ์ „ํ•œ ๊ณณ์— ์ €์žฅ
27
+
28
+ ### 1.3 ๋กœ์ปฌ ํ™˜๊ฒฝ ์„ค์ •
29
+ ```bash
30
+ # Hugging Face CLI ์„ค์น˜
31
+ pip install huggingface_hub
32
+
33
+ # ๋กœ๊ทธ์ธ
34
+ huggingface-cli login
35
+ # ํ† ํฐ ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ์—์„œ ์œ„์—์„œ ์ƒ์„ฑํ•œ ํ† ํฐ ์ž…๋ ฅ
36
+ ```
37
+
38
+ ## ๐Ÿ”ง 2๋‹จ๊ณ„: ๋ชจ๋ธ ์ค€๋น„ ๋ฐ ์—…๋กœ๋“œ
39
+
40
+ ### 2.1 ๋กœ์ปฌ ๋ชจ๋ธ ํ™•์ธ
41
+ ```bash
42
+ cd C:\Project\lily_generate_project\lily_generate_package
43
+ ls hearth_llm_model/
44
+ ```
45
+
46
+ ### 2.2 ๋ชจ๋ธ์„ Hugging Face Hub์— ์—…๋กœ๋“œ
47
+ ```bash
48
+ # ๋ชจ๋ธ ์—…๋กœ๋“œ
49
+ huggingface-cli upload your-username/lily-math-model hearth_llm_model/
50
+
51
+ # ๋˜๋Š” Python ์Šคํฌ๋ฆฝํŠธ ์‚ฌ์šฉ
52
+ python huggingface_cloud_setup.py
53
+ ```
54
+
55
+ ### 2.3 ๋ชจ๋ธ ์นด๋“œ ์ƒ์„ฑ
56
+ ```markdown
57
+ # ๋ชจ๋ธ ์นด๋“œ ์˜ˆ์‹œ (README.md)
58
+ ---
59
+ language: ko
60
+ tags:
61
+ - math
62
+ - rag
63
+ - korean
64
+ license: mit
65
+ ---
66
+
67
+ # Lily Math RAG Model
68
+
69
+ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ์„ ์œ„ํ•œ ํ•œ๊ตญ์–ด RAG ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค.
70
+
71
+ ## ์‚ฌ์šฉ๋ฒ•
72
+
73
+ ```python
74
+ from transformers import AutoTokenizer, AutoModelForCausalLM
75
+
76
+ tokenizer = AutoTokenizer.from_pretrained("your-username/lily-math-model")
77
+ model = AutoModelForCausalLM.from_pretrained("your-username/lily-math-model")
78
+ ```
79
+ ```
80
+
81
+ ## โ˜๏ธ 3๋‹จ๊ณ„: Hugging Face Inference Endpoints ์„ค์ •
82
+
83
+ ### 3.1 Inference Endpoints ์ƒ์„ฑ
84
+ 1. **Hugging Face ์›น์‚ฌ์ดํŠธ ๋ฐฉ๋ฌธ**: https://huggingface.co/inference-endpoints
85
+ 2. **New Endpoint ํด๋ฆญ**
86
+ 3. **์„ค์ • ์ž…๋ ฅ**:
87
+ - **Repository**: `your-username/lily-math-model`
88
+ - **Framework**: `PyTorch`
89
+ - **Region**: `us-east-1` (๊ฐ€์žฅ ๋น ๋ฆ„)
90
+ - **Instance Type**: `gpu.t4.medium` (์‹œ์ž‘์šฉ)
91
+ - **Accelerator**: `GPU`
92
+
93
+ ### 3.2 ์—”๋“œํฌ์ธํŠธ ์„ค์ •
94
+ ```json
95
+ {
96
+ "repository": "your-username/lily-math-model",
97
+ "framework": "pytorch",
98
+ "accelerator": "gpu",
99
+ "instance_type": "gpu.t4.medium",
100
+ "region": "us-east-1",
101
+ "vendor": "aws"
102
+ }
103
+ ```
104
+
105
+ ### 3.3 ์—”๋“œํฌ์ธํŠธ URL ํ™•์ธ
106
+ - ์ƒ์„ฑ๋œ ์—”๋“œํฌ์ธํŠธ์˜ URL์„ ๋ณต์‚ฌ
107
+ - ์˜ˆ: `https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud`
108
+
109
+ ## ๐Ÿ”— 4๋‹จ๊ณ„: Railway Hearth Chat ์—ฐ๋™
110
+
111
+ ### 4.1 ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
112
+ ```bash
113
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
114
+ export RAILWAY_HEARTH_CHAT_URL="https://hearth-chat-production.up.railway.app"
115
+ export HF_ENDPOINT_URL="https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud"
116
+ export HF_TOKEN="your-huggingface-token"
117
+ ```
118
+
119
+ ### 4.2 ์—ฐ๋™ ํ…Œ์ŠคํŠธ
120
+ ```bash
121
+ # ์—ฐ๋™ ํ…Œ์ŠคํŠธ ์‹คํ–‰
122
+ python railway_hearth_chat_integration.py
123
+ ```
124
+
125
+ ### 4.3 Hearth Chat API ์ˆ˜์ • (ํ•„์š”์‹œ)
126
+ Railway Hearth Chat์—์„œ Hugging Face ์—”๋“œํฌ์ธํŠธ๋ฅผ ํ˜ธ์ถœํ•˜๋„๋ก API๋ฅผ ์ˆ˜์ •:
127
+
128
+ ```javascript
129
+ // Hearth Chat API ์˜ˆ์‹œ
130
+ async function callHuggingFaceAPI(message) {
131
+ const response = await fetch(process.env.HF_ENDPOINT_URL, {
132
+ method: 'POST',
133
+ headers: {
134
+ 'Authorization': `Bearer ${process.env.HF_TOKEN}`,
135
+ 'Content-Type': 'application/json'
136
+ },
137
+ body: JSON.stringify({
138
+ inputs: message,
139
+ parameters: {
140
+ max_length: 200,
141
+ temperature: 0.7
142
+ }
143
+ })
144
+ });
145
+
146
+ const result = await response.json();
147
+ return result.generated_text;
148
+ }
149
+ ```
150
+
151
+ ## ๐Ÿงช 5๋‹จ๊ณ„: ํ…Œ์ŠคํŠธ ๋ฐ ๊ฒ€์ฆ
152
+
153
+ ### 5.1 Hugging Face ์—”๋“œํฌ์ธํŠธ ํ…Œ์ŠคํŠธ
154
+ ```bash
155
+ # ์—”๋“œํฌ์ธํŠธ ํ…Œ์ŠคํŠธ
156
+ curl -X POST https://your-endpoint-id.us-east-1.aws.endpoints.huggingface.cloud \
157
+ -H "Authorization: Bearer your-token" \
158
+ -H "Content-Type: application/json" \
159
+ -d '{
160
+ "inputs": "์•ˆ๋…•ํ•˜์„ธ์š”! ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋„์™€์ฃผ์„ธ์š”.",
161
+ "parameters": {
162
+ "max_length": 100,
163
+ "temperature": 0.7
164
+ }
165
+ }'
166
+ ```
167
+
168
+ ### 5.2 Railway ์—ฐ๋™ ํ…Œ์ŠคํŠธ
169
+ ```bash
170
+ # ์ „์ฒด ์—ฐ๋™ ํ…Œ์ŠคํŠธ
171
+ python test_railway_huggingface_integration.py
172
+ ```
173
+
174
+ ## ๐Ÿ“Š 6๋‹จ๊ณ„: ๋ชจ๋‹ˆํ„ฐ๋ง ๋ฐ ์ตœ์ ํ™”
175
+
176
+ ### 6.1 Hugging Face ๋ชจ๋‹ˆํ„ฐ๋ง
177
+ - **Inference Endpoints ๋Œ€์‹œ๋ณด๋“œ**: https://huggingface.co/inference-endpoints
178
+ - **์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ**: GPU ์‚ฌ์šฉ๋Ÿ‰, ์š”์ฒญ ์ˆ˜, ์‘๋‹ต ์‹œ๊ฐ„
179
+ - **๋น„์šฉ ๋ชจ๋‹ˆํ„ฐ๋ง**: ์›”๋ณ„ ์‚ฌ์šฉ๋Ÿ‰ ๋ฐ ๋น„์šฉ
180
+
181
+ ### 6.2 Railway ๋ชจ๋‹ˆํ„ฐ๋ง
182
+ - **Railway ๋Œ€์‹œ๋ณด๋“œ**: https://railway.app/dashboard
183
+ - **๋กœ๊ทธ ํ™•์ธ**: ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ๋กœ๊ทธ ๋ฐ ์˜ค๋ฅ˜
184
+ - **์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง**: ์‘๋‹ต ์‹œ๊ฐ„, ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰
185
+
186
+ ## ๐Ÿ”ง ๋ฌธ์ œ ํ•ด๊ฒฐ
187
+
188
+ ### Hugging Face ๊ด€๋ จ ๋ฌธ์ œ
189
+ 1. **ํ† ํฐ ์ธ์ฆ ์˜ค๋ฅ˜**: ํ† ํฐ ์žฌ์ƒ์„ฑ ๋ฐ ํ™•์ธ
190
+ 2. **๋ชจ๋ธ ์—…๋กœ๋“œ ์‹คํŒจ**: ํŒŒ์ผ ํฌ๊ธฐ ๋ฐ ํ˜•์‹ ํ™•์ธ
191
+ 3. **์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ ์‹คํŒจ**: GPU ํ• ๋‹น๋Ÿ‰ ํ™•์ธ
192
+
193
+ ### Railway ์—ฐ๋™ ๋ฌธ์ œ
194
+ 1. **์—ฐ๊ฒฐ ์‹คํŒจ**: URL ๋ฐ ๋„คํŠธ์›Œํฌ ํ™•์ธ
195
+ 2. **API ์˜ค๋ฅ˜**: ์—”๋“œํฌ์ธํŠธ ๋ฐ ํ—ค๋” ํ™•์ธ
196
+ 3. **์‘๋‹ต ์ง€์—ฐ**: ํƒ€์ž„์•„์›ƒ ์„ค์ • ์กฐ์ •
197
+
198
+ ## ๐Ÿ’ฐ ๋น„์šฉ ์ตœ์ ํ™”
199
+
200
+ ### Hugging Face ๋น„์šฉ
201
+ - **gpu.t4.medium**: $0.60/์‹œ๊ฐ„ (์‹œ์ž‘์šฉ)
202
+ - **gpu.t4.large**: $1.20/์‹œ๊ฐ„ (์„ฑ๋Šฅ ํ–ฅ์ƒ)
203
+ - **gpu.a10g**: $2.40/์‹œ๊ฐ„ (๊ณ ์„ฑ๋Šฅ)
204
+
205
+ ### ๋น„์šฉ ์ ˆ์•ฝ ํŒ
206
+ 1. **์ž๋™ ์Šค์ผ€์ผ๋ง**: ์‚ฌ์šฉํ•˜์ง€ ์•Š์„ ๋•Œ ์—”๋“œํฌ์ธํŠธ ์ค‘์ง€
207
+ 2. **์บ์‹ฑ**: ๋™์ผํ•œ ์š”์ฒญ์— ๋Œ€ํ•œ ์‘๋‹ต ์บ์‹ฑ
208
+ 3. **๋ฐฐ์น˜ ์ฒ˜๋ฆฌ**: ์—ฌ๋Ÿฌ ์š”์ฒญ์„ ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌ
209
+
210
+ ## ๐Ÿš€ ๋ฐฐํฌ ์ฒดํฌ๋ฆฌ์ŠคํŠธ
211
+
212
+ - [ ] Hugging Face ๊ณ„์ • ์ƒ์„ฑ ๋ฐ ํ† ํฐ ์„ค์ •
213
+ - [ ] ๋กœ์ปฌ ๋ชจ๋ธ ํ™•์ธ ๋ฐ ์—…๋กœ๋“œ
214
+ - [ ] Inference Endpoints ์ƒ์„ฑ
215
+ - [ ] ์—”๋“œํฌ์ธํŠธ URL ๋ฐ ํ† ํฐ ํ™•์ธ
216
+ - [ ] Railway Hearth Chat URL ํ™•์ธ
217
+ - [ ] ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
218
+ - [ ] ์—ฐ๋™ ํ…Œ์ŠคํŠธ ์‹คํ–‰
219
+ - [ ] ๋ชจ๋‹ˆํ„ฐ๋ง ์„ค์ •
220
+ - [ ] ๋น„์šฉ ์ตœ์ ํ™” ์„ค์ •
221
+
222
+ ## ๐Ÿ“ž ์ง€์›
223
+
224
+ ### ์œ ์šฉํ•œ ๋งํฌ
225
+ - **Hugging Face ๋ฌธ์„œ**: https://huggingface.co/docs
226
+ - **Inference Endpoints ๊ฐ€์ด๋“œ**: https://huggingface.co/docs/inference-endpoints
227
+ - **Railway ๋ฌธ์„œ**: https://docs.railway.app
228
+
229
+ ### ๋ฌธ์ œ ํ•ด๊ฒฐ
230
+ 1. **Hugging Face ์ง€์›**: https://huggingface.co/support
231
+ 2. **Railway ์ง€์›**: https://railway.app/support
232
+ 3. **์ปค๋ฎค๋‹ˆํ‹ฐ**: GitHub Issues ๋ฐ Discord
233
+
234
+ ## ๐ŸŽ‰ ์„ฑ๊ณต ํ™•์ธ
235
+
236
+ ๋ชจ๋“  ์„ค์ •์ด ์™„๋ฃŒ๋˜๋ฉด ๋‹ค์Œ์„ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค:
237
+
238
+ - โœ… **Hugging Face ์—”๋“œํฌ์ธํŠธ**: GPU์—์„œ AI ๋ชจ๋ธ ์‹คํ–‰
239
+ - โœ… **Railway Hearth Chat**: ์›น ์ธํ„ฐํŽ˜์ด์Šค์—์„œ ์ฑ„ํŒ… ๊ฐ€๋Šฅ
240
+ - โœ… **์—ฐ๋™**: ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ โ†’ Hugging Face โ†’ AI ์‘๋‹ต โ†’ Hearth Chat
241
+ - โœ… **๋ชจ๋‹ˆํ„ฐ๋ง**: ์‹ค์‹œ๊ฐ„ ์‚ฌ์šฉ๋Ÿ‰ ๋ฐ ์„ฑ๋Šฅ ํ™•์ธ
PROMPT.md ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM RAG ์‹œ์Šคํ…œ ๊ฐœ๋ฐœ - ์ƒˆ ์ฑ„ํŒ…์ฐฝ AI ํ”„๋กฌํ”„ํŠธ
2
+
3
+ ## ๐ŸŽฏ ํ”„๋กœ์ ํŠธ ๋ชฉํ‘œ
4
+ PDF ๋ฌธ์„œ์˜ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด์„ ๋ฐ ํ•ด๊ฒฐ์„ ์œ„ํ•œ RAG ์‹œ์Šคํ…œ์„ ๊ตฌ์ถ•ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ํ˜„์žฌ CPU ํ™˜๊ฒฝ์—์„œ ์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ๋น ๋ฅธ ๊ฒ€์ฆ์„ ์™„๋ฃŒํ–ˆ์œผ๋ฉฐ, ๋‹ค์Œ ๋‹จ๊ณ„๋กœ GPU ์„œ๋ฒ„ ๋ฐฐํฌ ๋ฐ ์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋Šฅ๋ ฅ ํ–ฅ์ƒ์„ ์ง„ํ–‰ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
5
+
6
+ ## ๐Ÿ“‹ ํ˜„์žฌ ์ƒํ™ฉ
7
+
8
+ ### โœ… ์™„๋ฃŒ๋œ ์ž‘์—…
9
+ 1. **ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ์‹œ์Šคํ…œ ๊ตฌ์ถ•**
10
+ - PDF ํ…์ŠคํŠธ ์ง์ ‘ ์ถ”์ถœ (์ด๋ฏธ์ง€ OCR ๋Œ€์‹ )
11
+ - RAG ํ”„๋กœ์„ธ์„œ ๋‹จ์ˆœํ™” (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์ œ๊ฑฐ)
12
+ - LLM ์—†์ด๋„ ์ž‘๋™ํ•˜๋Š” ๊ตฌ์กฐํ™”๋œ ์‘๋‹ต
13
+
14
+ 2. **ํ…Œ์ŠคํŠธ ์‹œ์Šคํ…œ ๊ตฌ์ถ•**
15
+ - ์„œ๋ฒ„ ์—ฐ๊ฒฐ ๋ฐ ๋ฌธ์„œ ์—…๋กœ๋“œ ํ…Œ์ŠคํŠธ
16
+ - ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ์ฟผ๋ฆฌ ํ…Œ์ŠคํŠธ
17
+ - ๊ตฌ์ฒด์ ์ธ ์ˆ˜ํ•™ ๋ฌธ์ œ ์งˆ๋ฌธ ํ…Œ์ŠคํŠธ
18
+
19
+ 3. **์„ฑ๊ณผ ํ™•์ธ**
20
+ - PDF ํ…์ŠคํŠธ ์ถ”์ถœ ์™„๋ฒฝ ์ž‘๋™
21
+ - ๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ ์™„๋ฒฝ ์ž‘๋™ (5๊ฐœ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ)
22
+ - ๋น ๋ฅธ ์ฒ˜๋ฆฌ (์ฆ‰์‹œ ์‘๋‹ต)
23
+ - ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
24
+
25
+ ### ๐Ÿ”ง ํ•ด๊ฒฐ๋œ ๋ฌธ์ œ๋“ค
26
+ 1. **ํ† ํฐ ID ๋ฌธ์ œ**: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•ด๊ฒฐ
27
+ 2. **PDF ํ…์ŠคํŠธ ์ถ”์ถœ**: ์‹ค์ œ ๋ฌธ์ œ ๋‚ด์šฉ ์ถ”์ถœ ์„ฑ๊ณต
28
+ 3. **๋น ๋ฅธ ๊ฒ€์ฆ**: ์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ์„ฑ๊ณต
29
+ 4. **๊ตฌ์กฐํ™”๋œ ์‘๋‹ต**: ๋ฌธ์ œ ๋ถ„์„๊ณผ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• ํฌํ•จ
30
+
31
+ ## ๐Ÿ“ ์ฃผ์š” ํŒŒ์ผ ๊ตฌ์กฐ
32
+
33
+ ### Core ํŒŒ์ผ๋“ค
34
+ - `lily_llm_core/document_processor.py`: PDF ํ…์ŠคํŠธ ์ถ”์ถœ ๊ฐœ์„ 
35
+ - `lily_llm_core/rag_processor.py`: RAG ํ”„๋กœ์„ธ์„œ ๋‹จ์ˆœํ™”
36
+ - `lily_llm_api/app_v2.py`: ์—”๋“œํฌ์ธํŠธ ์ˆ˜์ •
37
+
38
+ ### ํ…Œ์ŠคํŠธ ํŒŒ์ผ๋“ค
39
+ - `test_simple_rag.py`: ๊ธฐ๋ณธ ์—ฐ๊ฒฐ ํ…Œ์ŠคํŠธ
40
+ - `test_text_only_rag.py`: ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ RAG ํ…Œ์ŠคํŠธ
41
+ - `test_specific_questions.py`: ๊ตฌ์ฒด์  ์งˆ๋ฌธ ํ…Œ์ŠคํŠธ
42
+ - `test_llm_rag.py`: LLM ํฌํ•จ ํ…Œ์ŠคํŠธ
43
+
44
+ ## ๐Ÿš€ ๋‹ค์Œ ๋‹จ๊ณ„ ์š”์ฒญ์‚ฌํ•ญ
45
+
46
+ ### 1. GPU ์„œ๋ฒ„ ๋ฐฐํฌ ์ค€๋น„
47
+ - ํ˜„์žฌ CPU ํ™˜๊ฒฝ์—์„œ ํ…Œ์ŠคํŠธ ์™„๋ฃŒ
48
+ - GPU ํ™˜๊ฒฝ์—์„œ์˜ ์„ฑ๋Šฅ ์ตœ์ ํ™”
49
+ - ์„œ๋ฒ„ ๋ฐฐํฌ ์ž๋™ํ™” ์Šคํฌ๋ฆฝํŠธ ์ž‘์„ฑ
50
+
51
+ ### 2. LLM ํ†ตํ•ฉ ๊ฐœ์„ 
52
+ - Kanana ๋ชจ๋ธ์˜ ํ† ํฐ ๋””์ฝ”๋”ฉ ๋ฌธ์ œ ํ•ด๊ฒฐ
53
+ - ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด ๋Šฅ๋ ฅ ํ–ฅ์ƒ
54
+ - ๋” ์ •ํ™•ํ•œ ๋‹ต๋ณ€ ์ƒ์„ฑ
55
+
56
+ ### 3. ์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋Šฅ๋ ฅ ํ–ฅ์ƒ
57
+ - ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด ๋กœ์ง ๊ฐœ์„ 
58
+ - ๋‹จ๊ณ„๋ณ„ ํ•ด๊ฒฐ ๊ณผ์ • ์ œ์‹œ
59
+ - ์ •๋‹ต๊ณผ ํ•ด์„ค ์ œ๊ณต
60
+
61
+ ### 4. ์„ฑ๋Šฅ ์ตœ์ ํ™”
62
+ - ๋” ํฐ ๋ฐ์ดํ„ฐ์…‹ ์ฒ˜๋ฆฌ
63
+ - ์‘๋‹ต ์†๋„ ๊ฐœ์„ 
64
+ - ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ์ตœ์ ํ™”
65
+
66
+ ## ๐Ÿ’ก ๊ธฐ์ˆ ์  ์š”๊ตฌ์‚ฌํ•ญ
67
+
68
+ ### ํ˜„์žฌ ํ™˜๊ฒฝ
69
+ - **OS**: Windows 10
70
+ - **Python**: ๊ฐ€์ƒํ™˜๊ฒฝ (lily_llm_env)
71
+ - **์„œ๋ฒ„**: FastAPI (ํฌํŠธ 8001)
72
+ - **๋ชจ๋ธ**: Kanana-1.5-v-3b-instruct
73
+ - **๋ฒกํ„ฐ ์Šคํ† ์–ด**: FAISS
74
+
75
+ ### ๋ชฉํ‘œ ํ™˜๊ฒฝ
76
+ - **GPU ์„œ๋ฒ„**: ๋” ๋น ๋ฅธ ์ฒ˜๋ฆฌ ์†๋„
77
+ - **ํ™•์žฅ์„ฑ**: ๋” ํฐ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
78
+ - **์ •ํ™•์„ฑ**: ์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋Šฅ๋ ฅ
79
+
80
+ ## ๐Ÿ” ํ˜„์žฌ ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ
81
+
82
+ ### ์„ฑ๊ณตํ•œ ๊ธฐ๋Šฅ๋“ค
83
+ - โœ… **PDF ํ…์ŠคํŠธ ์ถ”์ถœ ์™„๋ฒฝ**: ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
84
+ - โœ… **๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ ์™„๋ฒฝ**: ๊ด€๋ จ ๋ฌธ์ œ๋“ค์„ ์ •ํ™•ํžˆ ์ฐพ์•„๋ƒ„
85
+ - โœ… **๋น ๋ฅธ ์ฒ˜๋ฆฌ**: ์ฆ‰์‹œ ์‘๋‹ต (LLM ์—†์ด๋„ ์ž‘๋™)
86
+ - โœ… **๊ตฌ์กฐํ™”๋œ ์‘๋‹ต**: ๋ฌธ์ œ ๋ถ„์„๊ณผ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• ์ œ์‹œ
87
+ - โœ… **์ •ํ™•ํ•œ ๋ฌธ์ œ ๋งค์นญ**: 23๋ฒˆ, 24๋ฒˆ, 15๋ฒˆ ๋ฌธ์ œ ์ •ํ™•ํžˆ ์ฐพ์Œ
88
+
89
+ ### ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ
90
+ - **๋ฌธ์„œ ์—…๋กœ๋“œ**: 12๊ฐœ ์ฒญํฌ ์„ฑ๊ณต
91
+ - **๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ**: 5๊ฐœ์”ฉ ์ •ํ™•ํžˆ ๋ฐ˜ํ™˜
92
+ - **์‘๋‹ต ์‹œ๊ฐ„**: ์ฆ‰์‹œ (ํ† ํฐ ID ๋ฌธ์ œ ํ•ด๊ฒฐ๋จ)
93
+ - **๋ฌธ์ œ ์ธ์‹**: ์‹ค์ œ ์ˆ˜ํ•™ ๋ฌธ์ œ ๋‚ด์šฉ ์ •ํ™•ํžˆ ์ถ”์ถœ
94
+
95
+ ## ๐Ÿ“ ์š”์ฒญ์‚ฌํ•ญ
96
+
97
+ ์ƒˆ๋กœ์šด AI ์–ด์‹œ์Šคํ„ดํŠธ์—๊ฒŒ ๋‹ค์Œ ์ž‘์—…์„ ์š”์ฒญํ•ฉ๋‹ˆ๋‹ค:
98
+
99
+ 1. **ํ˜„์žฌ ์ฝ”๋“œ๋ฒ ์ด์Šค ๋ถ„์„**: ์œ„์—์„œ ์–ธ๊ธ‰๋œ ํŒŒ์ผ๋“ค์„ ๊ฒ€ํ† ํ•˜์—ฌ ํ˜„์žฌ ์ƒํƒœ ํŒŒ์•…
100
+ 2. **GPU ์„œ๋ฒ„ ๋ฐฐํฌ ๊ณ„ํš ์ˆ˜๋ฆฝ**: ํ˜„์žฌ CPU ํ…Œ์ŠคํŠธ ์™„๋ฃŒ ์ƒํƒœ์—์„œ GPU ํ™˜๊ฒฝ์œผ๋กœ ์ „ํ™˜
101
+ 3. **LLM ํ†ตํ•ฉ ๊ฐœ์„ **: ํ† ํฐ ๋””์ฝ”๋”ฉ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋ฐ ์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋Šฅ๋ ฅ ํ–ฅ์ƒ
102
+ 4. **์„ฑ๋Šฅ ์ตœ์ ํ™”**: ๋” ํฐ ๋ฐ์ดํ„ฐ์…‹ ์ฒ˜๋ฆฌ ๋ฐ ์‘๋‹ต ์†๋„ ๊ฐœ์„ 
103
+ 5. **์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ**: ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด ๋กœ์ง ๊ฐœ์„  ๋ฐ ์ •ํ™•ํ•œ ๋‹ต๋ณ€ ์ƒ์„ฑ
104
+
105
+ ํ˜„์žฌ "์ž‘์€ ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ๋น ๋ฅธ ๊ฒ€์ฆ" ๋ชฉํ‘œ๋Š” ๋‹ฌ์„ฑํ–ˆ์œผ๋ฏ€๋กœ, ๋‹ค์Œ ๋‹จ๊ณ„์ธ "์‹ค์ œ ๋ฌธ์ œ ํ•ด๊ฒฐ ๋Šฅ๋ ฅ ํ–ฅ์ƒ"๊ณผ "GPU ์„œ๋ฒ„ ๋ฐฐํฌ"์— ์ง‘์ค‘ํ•ด์ฃผ์„ธ์š”.
README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API - Hugging Face Spaces
2
+
3
+ ## ๐Ÿค– ์†Œ๊ฐœ
4
+
5
+ Lily LLM API๋Š” ๋‹ค์ค‘ ๋ชจ๋ธ ์ง€์›๊ณผ RAG(Retrieval Augmented Generation) ์‹œ์Šคํ…œ์„ ๊ฐ–์ถ˜ ๊ณ ์„ฑ๋Šฅ AI API ์„œ๋ฒ„์ž…๋‹ˆ๋‹ค.
6
+
7
+ ### โœจ ์ฃผ์š” ๊ธฐ๋Šฅ
8
+
9
+ - **๐Ÿง  ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์„ ํ†ตํ•œ ํ…์ŠคํŠธ ๋ฐ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
10
+ - **๐Ÿ“š RAG ์‹œ์Šคํ…œ**: ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต ๋ฐ ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰
11
+ - **๐Ÿ” ๋ฒกํ„ฐ ๊ฒ€์ƒ‰**: FAISS ๊ธฐ๋ฐ˜ ๊ณ ์† ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰
12
+ - **๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ**: PDF, DOCX, TXT ๋“ฑ ๋‹ค์–‘ํ•œ ๋ฌธ์„œ ํ˜•์‹ ์ง€์›
13
+ - **๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR**: LaTeX-OCR์„ ํ†ตํ•œ ์ˆ˜ํ•™ ๊ณต์‹ ์ธ์‹
14
+ - **โšก ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ**: Celery ๊ธฐ๋ฐ˜ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…
15
+ - **๐ŸŒ RESTful API**: FastAPI ๊ธฐ๋ฐ˜ ๊ณ ์„ฑ๋Šฅ ์›น API
16
+
17
+ ### ๐Ÿš€ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
18
+
19
+ #### 1. ํ…์ŠคํŠธ ์ƒ์„ฑ
20
+
21
+ ```python
22
+ import requests
23
+
24
+ response = requests.post(
25
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
26
+ data={"prompt": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๊ฐ€ ์–ด๋–ค๊ฐ€์š”?"}
27
+ )
28
+ print(response.json())
29
+ ```
30
+
31
+ #### 2. ์ด๋ฏธ์ง€์™€ ํ•จ๊ป˜ ์งˆ์˜
32
+
33
+ ```python
34
+ import requests
35
+
36
+ with open("image.jpg", "rb") as f:
37
+ response = requests.post(
38
+ "https://https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
39
+ data={"prompt": "์ด๋ฏธ์ง€์—์„œ ๋ฌด์—‡์„ ๋ณผ ์ˆ˜ ์žˆ๋‚˜์š”?"},
40
+ files={"image1": f}
41
+ )
42
+ print(response.json())
43
+ ```
44
+
45
+ #### 3. RAG ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต
46
+
47
+ ```python
48
+ import requests
49
+
50
+ # ๋ฌธ์„œ ์—…๋กœ๋“œ
51
+ with open("document.pdf", "rb") as f:
52
+ upload_response = requests.post(
53
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/upload-document",
54
+ files={"file": f},
55
+ data={"user_id": "your_user_id"}
56
+ )
57
+
58
+ document_id = upload_response.json()["document_id"]
59
+
60
+ # RAG ์งˆ์˜
61
+ response = requests.post(
62
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/rag-query",
63
+ json={
64
+ "query": "๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
65
+ "user_id": "your_user_id",
66
+ "document_id": document_id
67
+ }
68
+ )
69
+ print(response.json())
70
+ ```
71
+
72
+ ### ๐Ÿ“‹ API ์—”๋“œํฌ์ธํŠธ
73
+
74
+ #### ๊ธฐ๋ณธ ์—”๋“œํฌ์ธํŠธ
75
+ - `GET /health` - ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ
76
+ - `GET /models` - ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก
77
+ - `POST /load-model` - ๋ชจ๋ธ ๋กœ๋“œ
78
+ - `POST /generate` - ํ…์ŠคํŠธ/์ด๋ฏธ์ง€ ์ƒ์„ฑ
79
+
80
+ #### RAG ์‹œ์Šคํ…œ
81
+ - `POST /upload-document` - ๋ฌธ์„œ ์—…๋กœ๋“œ
82
+ - `POST /rag-query` - RAG ๊ธฐ๋ฐ˜ ์งˆ์˜
83
+ - `GET /documents/{user_id}` - ์‚ฌ์šฉ์ž ๋ฌธ์„œ ๋ชฉ๋ก
84
+ - `DELETE /document/{document_id}` - ๋ฌธ์„œ ์‚ญ์ œ
85
+
86
+ #### ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ
87
+ - `POST /batch-process` - ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
88
+ - `GET /task-status/{task_id}` - ์ž‘์—… ์ƒํƒœ ํ™•์ธ
89
+ - `POST /cancel-task/{task_id}` - ์ž‘์—… ์ทจ์†Œ
90
+
91
+ ### ๐Ÿ› ๏ธ ๊ธฐ์ˆ  ์Šคํƒ
92
+
93
+ - **Backend**: FastAPI, Python 3.11
94
+ - **AI Models**: Transformers, PyTorch
95
+ - **Vector DB**: FAISS, ChromaDB
96
+ - **Task Queue**: Celery, Redis
97
+ - **OCR**: LaTeX-OCR, EasyOCR
98
+ - **Document Processing**: LangChain
99
+
100
+ ### ๐Ÿ“Š ๋ชจ๋ธ ์ •๋ณด
101
+
102
+ #### Kanana-1.5-v-3b-instruct
103
+ - **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐœ๋ณ€์ˆ˜
104
+ - **์–ธ์–ด**: ํ•œ๊ตญ์–ด ํŠนํ™”
105
+ - **๊ธฐ๋Šฅ**: ํ…์ŠคํŠธ ์ƒ์„ฑ, ์ด๋ฏธ์ง€ ์ดํ•ด
106
+ - **์ปจํ…์ŠคํŠธ**: ์ตœ๋Œ€ 4096 ํ† ํฐ
107
+
108
+ ### ๐Ÿ”ง ์„ค์ •
109
+
110
+ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ํ†ตํ•ด ๋‹ค์Œ ์„ค์ •์„ ์กฐ์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค:
111
+
112
+ ```bash
113
+ # ์„œ๋ฒ„ ์„ค์ •
114
+ HOST=0.0.0.0
115
+ PORT=7860
116
+
117
+ # ๋ชจ๋ธ ์„ค์ •
118
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
119
+ MAX_NEW_TOKENS=256
120
+ TEMPERATURE=0.7
121
+
122
+ # ์บ์‹œ ์„ค์ •
123
+ TRANSFORMERS_CACHE=/app/cache/transformers
124
+ HF_HOME=/app/cache/huggingface
125
+ ```
126
+
127
+ ### ๐Ÿ“ ๋ผ์ด์„ ์Šค
128
+
129
+ ์ด ํ”„๋กœ์ ํŠธ๋Š” MIT ๋ผ์ด์„ ์Šค ํ•˜์— ๋ฐฐํฌ๋ฉ๋‹ˆ๋‹ค.
130
+
131
+ ### ๐Ÿค ๊ธฐ์—ฌ
132
+
133
+ ๋ฒ„๊ทธ ๋ฆฌํฌํŠธ, ๊ธฐ๋Šฅ ์ œ์•ˆ, ํ’€ ๋ฆฌํ€˜์ŠคํŠธ๋ฅผ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
134
+
135
+ ### ๐Ÿ“ž ์ง€์›
136
+
137
+ ๋ฌธ์˜์‚ฌํ•ญ์ด ์žˆ์œผ์‹œ๋ฉด GitHub Issues๋ฅผ ํ†ตํ•ด ์—ฐ๋ฝํ•ด ์ฃผ์„ธ์š”.
README_DEPLOYMENT.md ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API ๋ฐฐํฌ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ“‹ ๊ฐœ์š”
4
+
5
+ ์ด ๋ฌธ์„œ๋Š” Lily LLM API ์„œ๋ฒ„์˜ Docker ๊ธฐ๋ฐ˜ ๋ฐฐํฌ ๋ฐฉ๋ฒ•์„ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.
6
+
7
+ ## ๐Ÿ—๏ธ ์•„ํ‚คํ…์ฒ˜
8
+
9
+ ```
10
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
11
+ โ”‚ Lily LLM API โ”‚ โ”‚ Redis โ”‚ โ”‚ Celery Worker โ”‚
12
+ โ”‚ (FastAPI) โ”‚โ—„โ”€โ”€โ–บโ”‚ (Message โ”‚โ—„โ”€โ”€โ–บโ”‚ (Background โ”‚
13
+ โ”‚ Port: 8001 โ”‚ โ”‚ Broker) โ”‚ โ”‚ Tasks) โ”‚
14
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
15
+ โ”‚ โ”‚ โ”‚
16
+ โ”‚ โ”‚ โ”‚
17
+ โ–ผ โ–ผ โ–ผ
18
+ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
19
+ โ”‚ Flower โ”‚ โ”‚ Celery Beat โ”‚ โ”‚ Database โ”‚
20
+ โ”‚ (Monitoring) โ”‚ โ”‚ (Scheduler) โ”‚ โ”‚ (SQLite) โ”‚
21
+ โ”‚ Port: 5555 โ”‚ โ”‚ โ”‚ โ”‚ โ”‚
22
+ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜
23
+ ```
24
+
25
+ ## ๐Ÿš€ ๋น ๋ฅธ ์‹œ์ž‘
26
+
27
+ ### 1. ์‚ฌ์ „ ์š”๊ตฌ์‚ฌํ•ญ
28
+
29
+ - Docker
30
+ - Docker Compose
31
+ - ์ตœ์†Œ 4GB RAM
32
+ - ์ตœ์†Œ 10GB ๋””์Šคํฌ ๊ณต๊ฐ„
33
+
34
+ ### 2. ๋ฐฐํฌ ์‹คํ–‰
35
+
36
+ ```bash
37
+ # ์ €์žฅ์†Œ ํด๋ก 
38
+ git clone <repository-url>
39
+ cd lily_generate_package
40
+
41
+ # ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
42
+ chmod +x scripts/deploy.sh
43
+ ./scripts/deploy.sh deploy
44
+ ```
45
+
46
+ ### 3. ์„œ๋น„์Šค ํ™•์ธ
47
+
48
+ ```bash
49
+ # ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
50
+ ./scripts/deploy.sh status
51
+
52
+ # ๋กœ๊ทธ ํ™•์ธ
53
+ ./scripts/deploy.sh logs
54
+ ```
55
+
56
+ ## ๐Ÿ“ฆ Docker ์ปจํ…Œ์ด๋„ˆ
57
+
58
+ ### ์ฃผ์š” ์„œ๋น„์Šค
59
+
60
+ | ์„œ๋น„์Šค | ํฌํŠธ | ์„ค๋ช… |
61
+ |--------|------|------|
62
+ | lily-llm-api | 8001 | FastAPI ๋ฉ”์ธ ์„œ๋ฒ„ |
63
+ | redis | 6379 | ๋ฉ”์‹œ์ง€ ๋ธŒ๋กœ์ปค ๋ฐ ์บ์‹œ |
64
+ | celery-worker | - | ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์ฒ˜๋ฆฌ |
65
+ | celery-beat | - | ์Šค์ผ€์ค„๋Ÿฌ |
66
+ | flower | 5555 | Celery ๋ชจ๋‹ˆํ„ฐ๋ง |
67
+
68
+ ### ํ™˜๊ฒฝ ๋ณ€์ˆ˜
69
+
70
+ ```yaml
71
+ # API ์„œ๋ฒ„
72
+ REDIS_URL: redis://redis:6379
73
+ DATABASE_URL: sqlite:///app/data/lily_llm.db
74
+ LOG_LEVEL: INFO
75
+ CELERY_BROKER_URL: redis://redis:6379/0
76
+ CELERY_RESULT_BACKEND: redis://redis:6379/0
77
+ ```
78
+
79
+ ## ๐Ÿ”ง ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ
80
+
81
+ ### ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ช…๋ น์–ด
82
+
83
+ ```bash
84
+ # ์ „์ฒด ๋ฐฐํฌ
85
+ ./scripts/deploy.sh deploy
86
+
87
+ # ์„œ๋น„์Šค ์‹œ์ž‘
88
+ ./scripts/deploy.sh start
89
+
90
+ # ์„œ๋น„์Šค ์ค‘์ง€
91
+ ./scripts/deploy.sh stop
92
+
93
+ # ์„œ๋น„์Šค ์žฌ์‹œ์ž‘
94
+ ./scripts/deploy.sh restart
95
+
96
+ # ๋ฐฐํฌ ์—…๋ฐ์ดํŠธ
97
+ ./scripts/deploy.sh update
98
+
99
+ # ๋กœ๊ทธ ํ™•์ธ
100
+ ./scripts/deploy.sh logs
101
+
102
+ # ์ƒํƒœ ํ™•์ธ
103
+ ./scripts/deploy.sh status
104
+
105
+ # ์ •๋ฆฌ
106
+ ./scripts/deploy.sh cleanup
107
+
108
+ # ์ด๋ฏธ์ง€ ๋นŒ๋“œ
109
+ ./scripts/deploy.sh build
110
+ ```
111
+
112
+ ## ๐Ÿงช ํ…Œ์ŠคํŠธ
113
+
114
+ ### Docker ๋ฐฐํฌ ํ…Œ์ŠคํŠธ
115
+
116
+ ```bash
117
+ python scripts/test_docker_deployment.py
118
+ ```
119
+
120
+ ### ์ˆ˜๋™ ํ…Œ์ŠคํŠธ
121
+
122
+ ```bash
123
+ # API ์ƒํƒœ ํ™•์ธ
124
+ curl http://localhost:8001/health
125
+
126
+ # ๋ชจ๋ธ ๋ชฉ๋ก ํ™•์ธ
127
+ curl http://localhost:8001/models
128
+
129
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ ํ…Œ์ŠคํŠธ
130
+ curl -X POST http://localhost:8001/generate \
131
+ -H "Content-Type: application/x-www-form-urlencoded" \
132
+ -d "prompt=์•ˆ๋…•ํ•˜์„ธ์š”&model_id=polyglot-ko-1.3b-chat&max_length=50"
133
+ ```
134
+
135
+ ## ๐Ÿ“Š ๋ชจ๋‹ˆํ„ฐ๋ง
136
+
137
+ ### Flower (Celery ๋ชจ๋‹ˆํ„ฐ๋ง)
138
+
139
+ - URL: http://localhost:5555
140
+ - ์ž‘์—… ์ƒํƒœ, ์›Œ์ปค ์ƒํƒœ, ์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ํ™•์ธ
141
+
142
+ ### API ๋ชจ๋‹ˆํ„ฐ๋ง
143
+
144
+ ```bash
145
+ # ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘
146
+ curl -X POST http://localhost:8001/monitoring/start
147
+
148
+ # ์„ฑ๋Šฅ ์ƒํƒœ ํ™•์ธ
149
+ curl http://localhost:8001/monitoring/status
150
+
151
+ # ์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ
152
+ curl http://localhost:8001/monitoring/health
153
+ ```
154
+
155
+ ## ๐Ÿ”’ ๋ณด์•ˆ
156
+
157
+ ### ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๊ด€๋ฆฌ
158
+
159
+ ```bash
160
+ # .env ํŒŒ์ผ ์ƒ์„ฑ
161
+ cat > .env << EOF
162
+ SECRET_KEY=your-secret-key-here
163
+ JWT_SECRET_KEY=your-jwt-secret-key
164
+ DATABASE_URL=sqlite:///app/data/lily_llm.db
165
+ REDIS_URL=redis://redis:6379
166
+ EOF
167
+ ```
168
+
169
+ ### ๋ฐฉํ™”๋ฒฝ ์„ค์ •
170
+
171
+ ```bash
172
+ # ํ•„์š”ํ•œ ํฌํŠธ๋งŒ ์—ด๊ธฐ
173
+ sudo ufw allow 8001 # API ์„œ๋ฒ„
174
+ sudo ufw allow 5555 # Flower ๋ชจ๋‹ˆํ„ฐ๋ง
175
+ ```
176
+
177
+ ## ๐Ÿ“ˆ ์„ฑ๋Šฅ ์ตœ์ ํ™”
178
+
179
+ ### ๋ฆฌ์†Œ์Šค ์ œํ•œ
180
+
181
+ ```yaml
182
+ # docker-compose.yml์— ์ถ”๊ฐ€
183
+ services:
184
+ lily-llm-api:
185
+ deploy:
186
+ resources:
187
+ limits:
188
+ memory: 4G
189
+ cpus: '2.0'
190
+ reservations:
191
+ memory: 2G
192
+ cpus: '1.0'
193
+ ```
194
+
195
+ ### ์บ์‹œ ์„ค์ •
196
+
197
+ ```python
198
+ # Redis ์บ์‹œ ํ™œ์šฉ
199
+ import redis
200
+ from functools import lru_cache
201
+
202
+ redis_client = redis.Redis(host='redis', port=6379, db=0)
203
+
204
+ @lru_cache(maxsize=128)
205
+ def cached_model_response(prompt, model_id):
206
+ # ์บ์‹œ๋œ ์‘๋‹ต ๋ฐ˜ํ™˜
207
+ pass
208
+ ```
209
+
210
+ ## ๐Ÿšจ ๋ฌธ์ œ ํ•ด๊ฒฐ
211
+
212
+ ### ์ผ๋ฐ˜์ ์ธ ๋ฌธ์ œ
213
+
214
+ #### 1. ๋ฉ”๏ฟฝ๏ฟฝ๋ฆฌ ๋ถ€์กฑ
215
+
216
+ ```bash
217
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
218
+ docker stats
219
+
220
+ # ๋ถˆํ•„์š”ํ•œ ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
221
+ docker system prune -f
222
+ ```
223
+
224
+ #### 2. API ์‘๋‹ต ์—†์Œ
225
+
226
+ ```bash
227
+ # ์ปจํ…Œ์ด๋„ˆ ์ƒํƒœ ํ™•์ธ
228
+ docker-compose ps
229
+
230
+ # ๋กœ๊ทธ ํ™•์ธ
231
+ docker-compose logs lily-llm-api
232
+
233
+ # ์ปจํ…Œ์ด๋„ˆ ์žฌ์‹œ์ž‘
234
+ docker-compose restart lily-llm-api
235
+ ```
236
+
237
+ #### 3. Redis ์—ฐ๊ฒฐ ์‹คํŒจ
238
+
239
+ ```bash
240
+ # Redis ์ปจํ…Œ์ด๋„ˆ ์ƒํƒœ ํ™•์ธ
241
+ docker-compose logs redis
242
+
243
+ # Redis ์žฌ์‹œ์ž‘
244
+ docker-compose restart redis
245
+ ```
246
+
247
+ ### ๋กœ๊ทธ ๋ถ„์„
248
+
249
+ ```bash
250
+ # ์‹ค์‹œ๊ฐ„ ๋กœ๊ทธ ํ™•์ธ
251
+ docker-compose logs -f
252
+
253
+ # ํŠน์ • ์„œ๋น„์Šค ๋กœ๊ทธ
254
+ docker-compose logs -f lily-llm-api
255
+
256
+ # ์˜ค๋ฅ˜ ๋กœ๊ทธ๋งŒ ํ™•์ธ
257
+ docker-compose logs | grep ERROR
258
+ ```
259
+
260
+ ## ๐Ÿ”„ ์—…๋ฐ์ดํŠธ
261
+
262
+ ### ์ฝ”๋“œ ์—…๋ฐ์ดํŠธ
263
+
264
+ ```bash
265
+ # ์ตœ์‹  ์ฝ”๋“œ ๊ฐ€์ ธ์˜ค๊ธฐ
266
+ git pull origin main
267
+
268
+ # ๋ฐฐํฌ ์—…๋ฐ์ดํŠธ
269
+ ./scripts/deploy.sh update
270
+ ```
271
+
272
+ ### ๋ชจ๋ธ ์—…๋ฐ์ดํŠธ
273
+
274
+ ```bash
275
+ # ๋ชจ๋ธ ํŒŒ์ผ ๊ต์ฒด
276
+ cp new_model.safetensors ./models/
277
+
278
+ # ์„œ๋น„์Šค ์žฌ์‹œ์ž‘
279
+ docker-compose restart lily-llm-api
280
+ ```
281
+
282
+ ## ๐Ÿ“š ์ถ”๊ฐ€ ๋ฌธ์„œ
283
+
284
+ - [API ๋ฌธ์„œ](http://localhost:8001/docs)
285
+ - [์„ฑ๋Šฅ ๊ฐ€์ด๋“œ](./PERFORMANCE.md)
286
+ - [๋ณด์•ˆ ๊ฐ€์ด๋“œ](./SECURITY.md)
287
+ - [๋ชจ๋‹ˆํ„ฐ๋ง ๊ฐ€์ด๋“œ](./MONITORING.md)
288
+
289
+ ## ๐Ÿค ์ง€์›
290
+
291
+ ๋ฌธ์ œ๊ฐ€ ๋ฐœ์ƒํ•˜๋ฉด ๋‹ค์Œ์„ ํ™•์ธํ•˜์„ธ์š”:
292
+
293
+ 1. ๋กœ๊ทธ ํŒŒ์ผ ํ™•์ธ
294
+ 2. ์‹œ์Šคํ…œ ๋ฆฌ์†Œ์Šค ํ™•์ธ
295
+ 3. ๋„คํŠธ์›Œํฌ ์—ฐ๊ฒฐ ํ™•์ธ
296
+ 4. ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ • ํ™•์ธ
297
+
298
+ ## ๐Ÿ“ ๋ณ€๊ฒฝ ๋กœ๊ทธ
299
+
300
+ ### v1.0.0 (2025-08-04)
301
+ - ์ดˆ๊ธฐ Docker ๋ฐฐํฌ ์„ค์ •
302
+ - CI/CD ํŒŒ์ดํ”„๋ผ์ธ ๊ตฌ์ถ•
303
+ - ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์Šคํ…œ ์ถ”๊ฐ€
304
+ - ๋ณด์•ˆ ์„ค์ • ๊ฐ•ํ™”
README_LILY.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API - Hugging Face Spaces
2
+
3
+ ## ๐Ÿค– ์†Œ๊ฐœ
4
+
5
+ Lily LLM API๋Š” ๋‹ค์ค‘ ๋ชจ๋ธ ์ง€์›๊ณผ RAG(Retrieval Augmented Generation) ์‹œ์Šคํ…œ์„ ๊ฐ–์ถ˜ ๊ณ ์„ฑ๋Šฅ AI API ์„œ๋ฒ„์ž…๋‹ˆ๋‹ค.
6
+
7
+ ### โœจ ์ฃผ์š” ๊ธฐ๋Šฅ
8
+
9
+ - **๐Ÿง  ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์„ ํ†ตํ•œ ํ…์ŠคํŠธ ๋ฐ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
10
+ - **๐Ÿ“š RAG ์‹œ์Šคํ…œ**: ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต ๋ฐ ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰
11
+ - **๐Ÿ” ๋ฒกํ„ฐ ๊ฒ€์ƒ‰**: FAISS ๊ธฐ๋ฐ˜ ๊ณ ์† ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰
12
+ - **๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ**: PDF, DOCX, TXT ๋“ฑ ๋‹ค์–‘ํ•œ ๋ฌธ์„œ ํ˜•์‹ ์ง€์›
13
+ - **๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR**: LaTeX-OCR์„ ํ†ตํ•œ ์ˆ˜ํ•™ ๊ณต์‹ ์ธ์‹
14
+ - **โšก ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ**: Celery ๊ธฐ๋ฐ˜ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…
15
+ - **๐ŸŒ RESTful API**: FastAPI ๊ธฐ๋ฐ˜ ๊ณ ์„ฑ๋Šฅ ์›น API
16
+
17
+ ### ๐Ÿš€ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
18
+
19
+ #### 1. ํ…์ŠคํŠธ ์ƒ์„ฑ
20
+
21
+ ```python
22
+ import requests
23
+
24
+ response = requests.post(
25
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
26
+ data={"prompt": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๊ฐ€ ์–ด๋–ค๊ฐ€์š”?"}
27
+ )
28
+ print(response.json())
29
+ ```
30
+
31
+ #### 2. ์ด๋ฏธ์ง€์™€ ํ•จ๊ป˜ ์งˆ์˜
32
+
33
+ ```python
34
+ import requests
35
+
36
+ with open("image.jpg", "rb") as f:
37
+ response = requests.post(
38
+ "https://https://huggingface.co/spaces/gbrabbit/lily_fast_api/generate",
39
+ data={"prompt": "์ด๋ฏธ์ง€์—์„œ ๋ฌด์—‡์„ ๋ณผ ์ˆ˜ ์žˆ๋‚˜์š”?"},
40
+ files={"image1": f}
41
+ )
42
+ print(response.json())
43
+ ```
44
+
45
+ #### 3. RAG ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต
46
+
47
+ ```python
48
+ import requests
49
+
50
+ # ๋ฌธ์„œ ์—…๋กœ๋“œ
51
+ with open("document.pdf", "rb") as f:
52
+ upload_response = requests.post(
53
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/upload-document",
54
+ files={"file": f},
55
+ data={"user_id": "your_user_id"}
56
+ )
57
+
58
+ document_id = upload_response.json()["document_id"]
59
+
60
+ # RAG ์งˆ์˜
61
+ response = requests.post(
62
+ "https://huggingface.co/spaces/gbrabbit/lily_fast_api/rag-query",
63
+ json={
64
+ "query": "๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
65
+ "user_id": "your_user_id",
66
+ "document_id": document_id
67
+ }
68
+ )
69
+ print(response.json())
70
+ ```
71
+
72
+ ### ๐Ÿ“‹ API ์—”๋“œํฌ์ธํŠธ
73
+
74
+ #### ๊ธฐ๋ณธ ์—”๋“œํฌ์ธํŠธ
75
+ - `GET /health` - ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ
76
+ - `GET /models` - ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก
77
+ - `POST /load-model` - ๋ชจ๋ธ ๋กœ๋“œ
78
+ - `POST /generate` - ํ…์ŠคํŠธ/์ด๋ฏธ์ง€ ์ƒ์„ฑ
79
+
80
+ #### RAG ์‹œ์Šคํ…œ
81
+ - `POST /upload-document` - ๋ฌธ์„œ ์—…๋กœ๋“œ
82
+ - `POST /rag-query` - RAG ๊ธฐ๋ฐ˜ ์งˆ์˜
83
+ - `GET /documents/{user_id}` - ์‚ฌ์šฉ์ž ๋ฌธ์„œ ๋ชฉ๋ก
84
+ - `DELETE /document/{document_id}` - ๋ฌธ์„œ ์‚ญ์ œ
85
+
86
+ #### ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ
87
+ - `POST /batch-process` - ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
88
+ - `GET /task-status/{task_id}` - ์ž‘์—… ์ƒํƒœ ํ™•์ธ
89
+ - `POST /cancel-task/{task_id}` - ์ž‘์—… ์ทจ์†Œ
90
+
91
+ ### ๐Ÿ› ๏ธ ๊ธฐ์ˆ  ์Šคํƒ
92
+
93
+ - **Backend**: FastAPI, Python 3.11
94
+ - **AI Models**: Transformers, PyTorch
95
+ - **Vector DB**: FAISS, ChromaDB
96
+ - **Task Queue**: Celery, Redis
97
+ - **OCR**: LaTeX-OCR, EasyOCR
98
+ - **Document Processing**: LangChain
99
+
100
+ ### ๐Ÿ“Š ๋ชจ๋ธ ์ •๋ณด
101
+
102
+ #### Kanana-1.5-v-3b-instruct
103
+ - **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐœ๋ณ€์ˆ˜
104
+ - **์–ธ์–ด**: ํ•œ๊ตญ์–ด ํŠนํ™”
105
+ - **๊ธฐ๋Šฅ**: ํ…์ŠคํŠธ ์ƒ์„ฑ, ์ด๋ฏธ์ง€ ์ดํ•ด
106
+ - **์ปจํ…์ŠคํŠธ**: ์ตœ๋Œ€ 4096 ํ† ํฐ
107
+
108
+ ### ๐Ÿ”ง ์„ค์ •
109
+
110
+ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ํ†ตํ•ด ๋‹ค์Œ ์„ค์ •์„ ์กฐ์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค:
111
+
112
+ ```bash
113
+ # ์„œ๋ฒ„ ์„ค์ •
114
+ HOST=0.0.0.0
115
+ PORT=7860
116
+
117
+ # ๋ชจ๋ธ ์„ค์ •
118
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
119
+ MAX_NEW_TOKENS=256
120
+ TEMPERATURE=0.7
121
+
122
+ # ์บ์‹œ ์„ค์ •
123
+ TRANSFORMERS_CACHE=/app/cache/transformers
124
+ HF_HOME=/app/cache/huggingface
125
+ ```
126
+
127
+ ### ๐Ÿ“ ๋ผ์ด์„ ์Šค
128
+
129
+ ์ด ํ”„๋กœ์ ํŠธ๋Š” MIT ๋ผ์ด์„ ์Šค ํ•˜์— ๋ฐฐํฌ๋ฉ๋‹ˆ๋‹ค.
130
+
131
+ ### ๐Ÿค ๊ธฐ์—ฌ
132
+
133
+ ๋ฒ„๊ทธ ๋ฆฌํฌํŠธ, ๊ธฐ๋Šฅ ์ œ์•ˆ, ํ’€ ๋ฆฌํ€˜์ŠคํŠธ๋ฅผ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
134
+
135
+ ### ๐Ÿ“ž ์ง€์›
136
+
137
+ ๋ฌธ์˜์‚ฌํ•ญ์ด ์žˆ์œผ์‹œ๋ฉด GitHub Issues๋ฅผ ํ†ตํ•ด ์—ฐ๋ฝํ•ด ์ฃผ์„ธ์š”.
README_gradio.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Lily Math RAG System
3
+ emoji: ๐Ÿงฎ
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ test
14
+
15
+ # ๐Ÿงฎ Lily Math RAG System
16
+
17
+ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ์„ ์œ„ํ•œ ํ•œ๊ตญ์–ด AI ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.
18
+
19
+ ## ๊ธฐ๋Šฅ
20
+
21
+ - ๐Ÿ’ฌ **์ฑ„ํŒ…**: ์ผ๋ฐ˜์ ์ธ ๋Œ€ํ™” ๋ฐ ์ˆ˜ํ•™ ์งˆ๋ฌธ
22
+ - ๐Ÿงฎ **์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ**: ๊ตฌ์ฒด์ ์ธ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด
23
+ - ๐Ÿ” **RAG ๊ธฐ์ˆ **: ๊ฒ€์ƒ‰ ๊ธฐ๋ฐ˜ ์ƒ์„ฑ์œผ๋กœ ์ •ํ™•ํ•œ ๋‹ต๋ณ€
24
+
25
+ ## ์‚ฌ์šฉ๋ฒ•
26
+
27
+ 1. **์ฑ„ํŒ… ํƒญ**: ์ผ๋ฐ˜์ ์ธ ๋Œ€ํ™”๋‚˜ ์ˆ˜ํ•™ ์งˆ๋ฌธ
28
+ 2. **์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํƒญ**: ๊ตฌ์ฒด์ ์ธ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ’€์ด
29
+ 3. **์„ค์ • ํƒญ**: ์‹œ์Šคํ…œ ์ •๋ณด ํ™•์ธ
30
+
31
+ ## ๊ธฐ์ˆ  ์Šคํƒ
32
+
33
+ - **๋ชจ๋ธ**: Polyglot-Ko 5.8B Chat
34
+ - **ํ”„๋ ˆ์ž„์›Œํฌ**: Gradio
35
+ - **์–ธ์–ด**: ํ•œ๊ตญ์–ด
36
+ - **์šฉ๋„**: ์ˆ˜ํ•™ ๊ต์œก ๋ฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
37
+
38
+ ## ๋ผ์ด์„ ์Šค
39
+
40
+ MIT License
README_huggingface.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API - Hugging Face Spaces
2
+
3
+ ## ๐Ÿค– ์†Œ๊ฐœ
4
+
5
+ Lily LLM API๋Š” ๋‹ค์ค‘ ๋ชจ๋ธ ์ง€์›๊ณผ RAG(Retrieval Augmented Generation) ์‹œ์Šคํ…œ์„ ๊ฐ–์ถ˜ ๊ณ ์„ฑ๋Šฅ AI API ์„œ๋ฒ„์ž…๋‹ˆ๋‹ค.
6
+
7
+ ### โœจ ์ฃผ์š” ๊ธฐ๋Šฅ
8
+
9
+ - **๐Ÿง  ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI**: Kanana-1.5-v-3b-instruct ๋ชจ๋ธ์„ ํ†ตํ•œ ํ…์ŠคํŠธ ๋ฐ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
10
+ - **๐Ÿ“š RAG ์‹œ์Šคํ…œ**: ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต ๋ฐ ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰
11
+ - **๐Ÿ” ๋ฒกํ„ฐ ๊ฒ€์ƒ‰**: FAISS ๊ธฐ๋ฐ˜ ๊ณ ์† ์œ ์‚ฌ๋„ ๊ฒ€์ƒ‰
12
+ - **๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ**: PDF, DOCX, TXT ๋“ฑ ๋‹ค์–‘ํ•œ ๋ฌธ์„œ ํ˜•์‹ ์ง€์›
13
+ - **๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR**: LaTeX-OCR์„ ํ†ตํ•œ ์ˆ˜ํ•™ ๊ณต์‹ ์ธ์‹
14
+ - **โšก ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ**: Celery ๊ธฐ๋ฐ˜ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…
15
+ - **๐ŸŒ RESTful API**: FastAPI ๊ธฐ๋ฐ˜ ๊ณ ์„ฑ๋Šฅ ์›น API
16
+
17
+ ### ๐Ÿš€ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
18
+
19
+ #### 1. ํ…์ŠคํŠธ ์ƒ์„ฑ
20
+
21
+ ```python
22
+ import requests
23
+
24
+ response = requests.post(
25
+ "https://your-space-url/generate",
26
+ data={"prompt": "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๊ฐ€ ์–ด๋–ค๊ฐ€์š”?"}
27
+ )
28
+ print(response.json())
29
+ ```
30
+
31
+ #### 2. ์ด๋ฏธ์ง€์™€ ํ•จ๊ป˜ ์งˆ์˜
32
+
33
+ ```python
34
+ import requests
35
+
36
+ with open("image.jpg", "rb") as f:
37
+ response = requests.post(
38
+ "https://your-space-url/generate",
39
+ data={"prompt": "์ด๋ฏธ์ง€์—์„œ ๋ฌด์—‡์„ ๋ณผ ์ˆ˜ ์žˆ๋‚˜์š”?"},
40
+ files={"image1": f}
41
+ )
42
+ print(response.json())
43
+ ```
44
+
45
+ #### 3. RAG ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต
46
+
47
+ ```python
48
+ import requests
49
+
50
+ # ๋ฌธ์„œ ์—…๋กœ๋“œ
51
+ with open("document.pdf", "rb") as f:
52
+ upload_response = requests.post(
53
+ "https://your-space-url/upload-document",
54
+ files={"file": f},
55
+ data={"user_id": "your_user_id"}
56
+ )
57
+
58
+ document_id = upload_response.json()["document_id"]
59
+
60
+ # RAG ์งˆ์˜
61
+ response = requests.post(
62
+ "https://your-space-url/rag-query",
63
+ json={
64
+ "query": "๋ฌธ์„œ์˜ ์ฃผ์š” ๋‚ด์šฉ์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
65
+ "user_id": "your_user_id",
66
+ "document_id": document_id
67
+ }
68
+ )
69
+ print(response.json())
70
+ ```
71
+
72
+ ### ๐Ÿ“‹ API ์—”๋“œํฌ์ธํŠธ
73
+
74
+ #### ๊ธฐ๋ณธ ์—”๋“œํฌ์ธํŠธ
75
+ - `GET /health` - ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ
76
+ - `GET /models` - ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก
77
+ - `POST /load-model` - ๋ชจ๋ธ ๋กœ๋“œ
78
+ - `POST /generate` - ํ…์ŠคํŠธ/์ด๋ฏธ์ง€ ์ƒ์„ฑ
79
+
80
+ #### RAG ์‹œ์Šคํ…œ
81
+ - `POST /upload-document` - ๋ฌธ์„œ ์—…๋กœ๋“œ
82
+ - `POST /rag-query` - RAG ๊ธฐ๋ฐ˜ ์งˆ์˜
83
+ - `GET /documents/{user_id}` - ์‚ฌ์šฉ์ž ๋ฌธ์„œ ๋ชฉ๋ก
84
+ - `DELETE /document/{document_id}` - ๋ฌธ์„œ ์‚ญ์ œ
85
+
86
+ #### ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ
87
+ - `POST /batch-process` - ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ
88
+ - `GET /task-status/{task_id}` - ์ž‘์—… ์ƒํƒœ ํ™•์ธ
89
+ - `POST /cancel-task/{task_id}` - ์ž‘์—… ์ทจ์†Œ
90
+
91
+ ### ๐Ÿ› ๏ธ ๊ธฐ์ˆ  ์Šคํƒ
92
+
93
+ - **Backend**: FastAPI, Python 3.11
94
+ - **AI Models**: Transformers, PyTorch
95
+ - **Vector DB**: FAISS, ChromaDB
96
+ - **Task Queue**: Celery, Redis
97
+ - **OCR**: LaTeX-OCR, EasyOCR
98
+ - **Document Processing**: LangChain
99
+
100
+ ### ๐Ÿ“Š ๋ชจ๋ธ ์ •๋ณด
101
+
102
+ #### Kanana-1.5-v-3b-instruct
103
+ - **ํฌ๊ธฐ**: 3.6B ๋งค๊ฐœ๋ณ€์ˆ˜
104
+ - **์–ธ์–ด**: ํ•œ๊ตญ์–ด ํŠนํ™”
105
+ - **๊ธฐ๋Šฅ**: ํ…์ŠคํŠธ ์ƒ์„ฑ, ์ด๋ฏธ์ง€ ์ดํ•ด
106
+ - **์ปจํ…์ŠคํŠธ**: ์ตœ๋Œ€ 4096 ํ† ํฐ
107
+
108
+ ### ๐Ÿ”ง ์„ค์ •
109
+
110
+ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋ฅผ ํ†ตํ•ด ๋‹ค์Œ ์„ค์ •์„ ์กฐ์ •ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค:
111
+
112
+ ```bash
113
+ # ์„œ๋ฒ„ ์„ค์ •
114
+ HOST=0.0.0.0
115
+ PORT=7860
116
+
117
+ # ๋ชจ๋ธ ์„ค์ •
118
+ DEFAULT_MODEL=kanana-1.5-v-3b-instruct
119
+ MAX_NEW_TOKENS=256
120
+ TEMPERATURE=0.7
121
+
122
+ # ์บ์‹œ ์„ค์ •
123
+ TRANSFORMERS_CACHE=/app/cache/transformers
124
+ HF_HOME=/app/cache/huggingface
125
+ ```
126
+
127
+ ### ๐Ÿ“ ๋ผ์ด์„ ์Šค
128
+
129
+ ์ด ํ”„๋กœ์ ํŠธ๋Š” MIT ๋ผ์ด์„ ์Šค ํ•˜์— ๋ฐฐํฌ๋ฉ๋‹ˆ๋‹ค.
130
+
131
+ ### ๐Ÿค ๊ธฐ์—ฌ
132
+
133
+ ๋ฒ„๊ทธ ๋ฆฌํฌํŠธ, ๊ธฐ๋Šฅ ์ œ์•ˆ, ํ’€ ๋ฆฌํ€˜์ŠคํŠธ๋ฅผ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
134
+
135
+ ### ๐Ÿ“ž ์ง€์›
136
+
137
+ ๋ฌธ์˜์‚ฌํ•ญ์ด ์žˆ์œผ์‹œ๋ฉด GitHub Issues๋ฅผ ํ†ตํ•ด ์—ฐ๋ฝํ•ด ์ฃผ์„ธ์š”.
WINDOWS_GPU_DEPLOYMENT_GUIDE.md ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐Ÿ–ฅ๏ธ Windows GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ“‹ ์‚ฌ์ „ ์š”๊ตฌ์‚ฌํ•ญ
4
+
5
+ ### 1. ํ•˜๋“œ์›จ์–ด ์š”๊ตฌ์‚ฌํ•ญ
6
+ - **GPU**: NVIDIA GPU (RTX 3060 ์ด์ƒ ๊ถŒ์žฅ)
7
+ - **๋ฉ”๋ชจ๋ฆฌ**: ์ตœ์†Œ 16GB RAM, ๊ถŒ์žฅ 32GB RAM
8
+ - **์ €์žฅ๊ณต๊ฐ„**: ์ตœ์†Œ 50GB ์—ฌ์œ  ๊ณต๊ฐ„
9
+
10
+ ### 2. ์†Œํ”„ํŠธ์›จ์–ด ์š”๊ตฌ์‚ฌํ•ญ
11
+
12
+ #### NVIDIA ๋“œ๋ผ์ด๋ฒ„ ์„ค์น˜
13
+ 1. **NVIDIA ์›น์‚ฌ์ดํŠธ ๋ฐฉ๋ฌธ**: https://www.nvidia.com/Download/index.aspx
14
+ 2. **GPU ๋ชจ๋ธ ์„ ํƒ**: ์‚ฌ์šฉ ์ค‘์ธ GPU ๋ชจ๋ธ ์„ ํƒ
15
+ 3. **๋“œ๋ผ์ด๋ฒ„ ๋‹ค์šด๋กœ๋“œ**: ์ตœ์‹  ๋“œ๋ผ์ด๋ฒ„ ๋‹ค์šด๋กœ๋“œ ๋ฐ ์„ค์น˜
16
+ 4. **์žฌ๋ถ€ํŒ…**: ์„ค์น˜ ์™„๋ฃŒ ํ›„ ์‹œ์Šคํ…œ ์žฌ๋ถ€ํŒ…
17
+
18
+ #### Docker Desktop ์„ค์น˜
19
+ 1. **Docker Desktop ๋‹ค์šด๋กœ๋“œ**: https://www.docker.com/products/docker-desktop
20
+ 2. **์„ค์น˜ ์‹คํ–‰**: ๋‹ค์šด๋กœ๋“œํ•œ ํŒŒ์ผ ์‹คํ–‰
21
+ 3. **WSL 2 ์„ค์ •**: Windows Subsystem for Linux 2 ํ™œ์„ฑํ™”
22
+ 4. **์žฌ๋ถ€ํŒ…**: ์„ค์น˜ ์™„๋ฃŒ ํ›„ ์‹œ์Šคํ…œ ์žฌ๋ถ€ํŒ…
23
+
24
+ #### Python GPU ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜
25
+ ```cmd
26
+ # ๊ฐ€์ƒํ™˜๊ฒฝ ํ™œ์„ฑํ™”
27
+ lily_llm_env\Scripts\activate
28
+
29
+ # PyTorch GPU ๋ฒ„์ „ ์„ค์น˜
30
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
31
+
32
+ # Hugging Face ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜
33
+ pip install transformers huggingface_hub
34
+
35
+ # ์ถ”๊ฐ€ GPU ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜
36
+ pip install accelerate bitsandbytes
37
+ ```
38
+
39
+ ## ๐Ÿ”ง ํ™˜๊ฒฝ ์„ค์ •
40
+
41
+ ### 1. GPU ํ™˜๊ฒฝ ํ™•์ธ
42
+ ```cmd
43
+ cd C:\Project\lily_generate_project\lily_generate_package
44
+ python check_gpu_environment.py
45
+ ```
46
+
47
+ ### 2. Windows GPU ์„ค์ •
48
+ ```cmd
49
+ # Windows GPU ํ™˜๊ฒฝ ์„ค์ • ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
50
+ windows_gpu_setup.bat
51
+ ```
52
+
53
+ ### 3. Hugging Face ์„ค์ •
54
+ ```cmd
55
+ # Hugging Face ํ† ํฐ ์„ค์ •
56
+ huggingface-cli login
57
+
58
+ # ๋˜๋Š” Python ์Šคํฌ๋ฆฝํŠธ๋กœ ์„ค์ •
59
+ python huggingface_gpu_setup.py
60
+ ```
61
+
62
+ ## ๐Ÿš€ ๋ฐฐํฌ ์‹คํ–‰
63
+
64
+ ### 1. ์ž๋™ ๋ฐฐํฌ (๊ถŒ์žฅ)
65
+ ```cmd
66
+ # Windows GPU ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
67
+ deploy_gpu_windows.bat
68
+ ```
69
+
70
+ ### 2. ์ˆ˜๋™ ๋ฐฐํฌ
71
+ ```cmd
72
+ # 1. ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
73
+ docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
74
+
75
+ # 2. ์ด๋ฏธ์ง€ ๋นŒ๋“œ
76
+ docker-compose -f docker-compose.gpu.yml build --no-cache
77
+
78
+ # 3. ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘
79
+ docker-compose -f docker-compose.gpu.yml up -d
80
+
81
+ # 4. ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
82
+ docker-compose -f docker-compose.gpu.yml logs -f
83
+ ```
84
+
85
+ ## ๐Ÿงช ํ…Œ์ŠคํŠธ
86
+
87
+ ### 1. GPU ๋ฐฐํฌ ํ…Œ์ŠคํŠธ
88
+ ```cmd
89
+ python test_gpu_deployment.py
90
+ ```
91
+
92
+ ### 2. Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
93
+ ```cmd
94
+ python huggingface_gpu_setup.py
95
+ ```
96
+
97
+ ### 3. API ํ…Œ์ŠคํŠธ
98
+ ```cmd
99
+ curl http://localhost:8001/health
100
+ ```
101
+
102
+ ## ๐Ÿ“Š ๋ชจ๋‹ˆํ„ฐ๋ง
103
+
104
+ ### 1. GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
105
+ ```cmd
106
+ # GPU ์ •๋ณด ํ™•์ธ
107
+ nvidia-smi
108
+
109
+ # ์‹ค์‹œ๊ฐ„ ๋ชจ๋‹ˆํ„ฐ๋ง
110
+ nvidia-smi -l 1
111
+ ```
112
+
113
+ ### 2. ์ปจํ…Œ์ด๋„ˆ ์ƒํƒœ ํ™•์ธ
114
+ ```cmd
115
+ # ์‹คํ–‰ ์ค‘์ธ ์ปจํ…Œ์ด๋„ˆ ํ™•์ธ
116
+ docker ps
117
+
118
+ # ์ปจํ…Œ์ด๋„ˆ ๋ฆฌ์†Œ์Šค ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
119
+ docker stats
120
+ ```
121
+
122
+ ### 3. ๋กœ๊ทธ ํ™•์ธ
123
+ ```cmd
124
+ # ์ „์ฒด ๋กœ๊ทธ
125
+ docker-compose -f docker-compose.gpu.yml logs -f
126
+
127
+ # ํŠน์ • ์„œ๋น„์Šค ๋กœ๊ทธ
128
+ docker-compose -f docker-compose.gpu.yml logs -f lily-llm-api-gpu
129
+ ```
130
+
131
+ ## ๐Ÿ”ง ๋ฌธ์ œ ํ•ด๊ฒฐ
132
+
133
+ ### 1. NVIDIA ๋“œ๋ผ์ด๋ฒ„ ๋ฌธ์ œ
134
+ ```cmd
135
+ # ๋“œ๋ผ์ด๋ฒ„ ๋ฒ„์ „ ํ™•์ธ
136
+ nvidia-smi
137
+
138
+ # ๋ฌธ์ œ ๋ฐœ์ƒ ์‹œ ๋“œ๋ผ์ด๋ฒ„ ์žฌ์„ค์น˜
139
+ # 1. ๊ธฐ์กด ๋“œ๋ผ์ด๋ฒ„ ์ œ๊ฑฐ
140
+ # 2. ์ตœ์‹  ๋“œ๋ผ์ด๋ฒ„ ๋‹ค์šด๋กœ๋“œ ๋ฐ ์„ค์น˜
141
+ # 3. ์‹œ์Šคํ…œ ์žฌ๋ถ€ํŒ…
142
+ ```
143
+
144
+ ### 2. Docker ๋ฌธ์ œ
145
+ ```cmd
146
+ # Docker Desktop ์žฌ์‹œ์ž‘
147
+ # Docker Desktop > Settings > General > Restart
148
+
149
+ # WSL 2 ํ™•์ธ
150
+ wsl --list --verbose
151
+
152
+ # Docker ๊ถŒํ•œ ๋ฌธ์ œ ํ•ด๊ฒฐ
153
+ # Docker Desktop > Settings > Resources > WSL Integration
154
+ ```
155
+
156
+ ### 3. CUDA ๋ฒ„์ „ ์ถฉ๋Œ
157
+ ```cmd
158
+ # PyTorch CUDA ๋ฒ„์ „ ํ™•์ธ
159
+ python -c "import torch; print(torch.version.cuda)"
160
+
161
+ # CUDA ๋ฒ„์ „์— ๋งž๋Š” PyTorch ์žฌ์„ค์น˜
162
+ pip uninstall torch torchvision torchaudio
163
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
164
+ ```
165
+
166
+ ### 4. ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ
167
+ ```cmd
168
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ํ™•์ธ
169
+ nvidia-smi
170
+
171
+ # ์ปจํ…Œ์ด๋„ˆ ์žฌ์‹œ์ž‘
172
+ docker-compose -f docker-compose.gpu.yml restart
173
+
174
+ # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™” ์ ์šฉ
175
+ python performance_optimization.py
176
+ ```
177
+
178
+ ## ๐Ÿ“ˆ ์„ฑ๋Šฅ ์ตœ์ ํ™”
179
+
180
+ ### 1. Windows ์ „์šฉ ์ตœ์ ํ™”
181
+ ```cmd
182
+ # ๊ฐ€์ƒ ๋ฉ”๋ชจ๋ฆฌ ์ฆ๊ฐ€
183
+ # ์ œ์–ดํŒ > ์‹œ์Šคํ…œ > ๊ณ ๊ธ‰ ์‹œ์Šคํ…œ ์„ค์ • > ์„ฑ๋Šฅ > ์„ค์ • > ๊ณ ๊ธ‰ > ๊ฐ€์ƒ ๋ฉ”๋ชจ๋ฆฌ
184
+
185
+ # ์ „์› ์„ค์ • ์ตœ์ ํ™”
186
+ # ์ œ์–ดํŒ > ์ „์› ์˜ต์…˜ > ๊ณ ์„ฑ๋Šฅ ์„ ํƒ
187
+ ```
188
+
189
+ ### 2. Docker ์ตœ์ ํ™”
190
+ ```cmd
191
+ # Docker Desktop ์„ค์ • ์ตœ์ ํ™”
192
+ # Docker Desktop > Settings > Resources
193
+ # - Memory: 8GB ์ด์ƒ ํ• ๋‹น
194
+ # - CPUs: 4๊ฐœ ์ด์ƒ ํ• ๋‹น
195
+ # - Disk image size: 64GB ์ด์ƒ
196
+ ```
197
+
198
+ ### 3. GPU ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
199
+ ```python
200
+ # 4-bit ์–‘์žํ™” ์ ์šฉ
201
+ python huggingface_gpu_setup.py
202
+
203
+ # ๋ฐฐ์น˜ ํฌ๊ธฐ ์กฐ์ •
204
+ # config.yaml์—์„œ batch_size ์กฐ์ •
205
+ ```
206
+
207
+ ## ๐Ÿ”„ ์—…๋ฐ์ดํŠธ
208
+
209
+ ### 1. ๋ชจ๋ธ ์—…๋ฐ์ดํŠธ
210
+ ```cmd
211
+ # ์ตœ์‹  ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ
212
+ python huggingface_gpu_setup.py
213
+
214
+ # ์ปจํ…Œ์ด๋„ˆ ์žฌ์‹œ์ž‘
215
+ docker-compose -f docker-compose.gpu.yml restart
216
+ ```
217
+
218
+ ### 2. ์ฝ”๋“œ ์—…๋ฐ์ดํŠธ
219
+ ```cmd
220
+ # ์ฝ”๋“œ ๋ณ€๊ฒฝ ํ›„ ์žฌ๋นŒ๋“œ
221
+ docker-compose -f docker-compose.gpu.yml build --no-cache
222
+ docker-compose -f docker-compose.gpu.yml up -d
223
+ ```
224
+
225
+ ## ๐Ÿ“ž ์ง€์›
226
+
227
+ ### ๋ฌธ์ œ ๋ฐœ์ƒ ์‹œ ํ™•์ธ์‚ฌํ•ญ
228
+ 1. **GPU ๋“œ๋ผ์ด๋ฒ„**: `nvidia-smi` ๋ช…๋ น์–ด ์‹คํ–‰ ๊ฐ€๋Šฅ ์—ฌ๋ถ€
229
+ 2. **Docker Desktop**: WSL 2 ํ†ตํ•ฉ ํ™œ์„ฑํ™” ์—ฌ๋ถ€
230
+ 3. **CUDA ๋ฒ„์ „**: PyTorch์™€ CUDA ๋ฒ„์ „ ํ˜ธํ™˜์„ฑ
231
+ 4. **์‹œ์Šคํ…œ ๋ฉ”๋ชจ๋ฆฌ**: 16GB ์ด์ƒ ์—ฌ์œ  ๋ฉ”๋ชจ๋ฆฌ
232
+ 5. **GPU ๋ฉ”๋ชจ๋ฆฌ**: 8GB ์ด์ƒ GPU ๋ฉ”๋ชจ๋ฆฌ
233
+
234
+ ### ๋กœ๊ทธ ํŒŒ์ผ ์œ„์น˜
235
+ - **Docker ๋กœ๊ทธ**: `docker-compose -f docker-compose.gpu.yml logs`
236
+ - **์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ๋กœ๊ทธ**: `logs/` ๋””๋ ‰ํ† ๋ฆฌ
237
+ - **GPU ๋กœ๊ทธ**: `nvidia-smi`
238
+
239
+ ## ๐ŸŽฏ Windows ์ „์šฉ ํŒ
240
+
241
+ ### 1. WSL 2 ์ตœ์ ํ™”
242
+ ```cmd
243
+ # WSL 2 ๋ฉ”๋ชจ๋ฆฌ ์ œํ•œ ์„ค์ •
244
+ # %UserProfile%\.wslconfig ํŒŒ์ผ ์ƒ์„ฑ
245
+ [wsl2]
246
+ memory=8GB
247
+ processors=4
248
+ ```
249
+
250
+ ### 2. Windows Defender ์˜ˆ์™ธ ์„ค์ •
251
+ ```cmd
252
+ # ํ”„๋กœ์ ํŠธ ํด๋”๋ฅผ Windows Defender ์˜ˆ์™ธ์— ์ถ”๊ฐ€
253
+ # Windows ๋ณด์•ˆ > ๋ฐ”์ด๋Ÿฌ์Šค ๋ฐ ์œ„ํ˜‘ ๋ฐฉ์ง€ > ์„ค์ • > ์˜ˆ์™ธ ์ถ”๊ฐ€
254
+ ```
255
+
256
+ ### 3. ์ „์› ๊ด€๋ฆฌ ์ตœ์ ํ™”
257
+ ```cmd
258
+ # ๊ณ ์„ฑ๋Šฅ ์ „์› ๊ณ„ํš ์„ ํƒ
259
+ powercfg /setactive 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c
260
+ ```
261
+
262
+ ## ๐Ÿš€ ๋น ๋ฅธ ์‹œ์ž‘
263
+
264
+ ### 1๋‹จ๊ณ„: ํ™˜๊ฒฝ ํ™•์ธ
265
+ ```cmd
266
+ windows_gpu_setup.bat
267
+ ```
268
+
269
+ ### 2๋‹จ๊ณ„: Hugging Face ์„ค์ •
270
+ ```cmd
271
+ python huggingface_gpu_setup.py
272
+ ```
273
+
274
+ ### 3๋‹จ๊ณ„: GPU ๋ฐฐํฌ
275
+ ```cmd
276
+ deploy_gpu_windows.bat
277
+ ```
278
+
279
+ ### 4๋‹จ๊ณ„: ํ…Œ์ŠคํŠธ
280
+ ```cmd
281
+ python test_gpu_deployment.py
282
+ ```
283
+
284
+ ## ๐ŸŽ‰ ์„ฑ๊ณต ํ™•์ธ
285
+
286
+ ๋ฐฐํฌ๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ์™„๋ฃŒ๋˜๋ฉด ๋‹ค์Œ ์„œ๋น„์Šค๋“ค์ด ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค:
287
+
288
+ - โœ… **Lily LLM API**: http://localhost:8001
289
+ - โœ… **Hearth Chat**: http://localhost:8000
290
+ - โœ… **LaTeX-OCR Service**: ๋ณ„๋„ ์ปจํ…Œ์ด๋„ˆ๋กœ ์‹คํ–‰
291
+ - โœ… **GPU ๊ฐ€์†**: NVIDIA GPU ํ™œ์šฉ
292
+ - โœ… **Hugging Face ๋ชจ๋ธ**: ์ตœ์ ํ™”๋œ ๋ชจ๋ธ ๋กœ๋“œ
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Lily Generate Package yy
app_huggingface.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hugging Face Spaces์šฉ Lily LLM API ์„œ๋ฒ„ ์ง„์ž…์ 
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import logging
9
+ import asyncio
10
+ import uvicorn
11
+ from pathlib import Path
12
+
13
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ๋ฅผ Python path์— ์ถ”๊ฐ€
14
+ project_root = Path(__file__).parent
15
+ sys.path.insert(0, str(project_root))
16
+
17
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
18
+ os.environ.setdefault("PYTHONPATH", str(project_root))
19
+ os.environ.setdefault("HOST", "0.0.0.0")
20
+ os.environ.setdefault("PORT", "7860")
21
+
22
+ # ๋กœ๊น… ์„ค์ •
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26
+ )
27
+ logger = logging.getLogger(__name__)
28
+
29
+ def setup_huggingface_environment():
30
+ """Hugging Face Spaces ํ™˜๊ฒฝ ์„ค์ •"""
31
+
32
+ # ํ•„์š”ํ•œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
33
+ directories = [
34
+ "data", "logs", "models", "uploads",
35
+ "vector_stores", "temp", "cache"
36
+ ]
37
+
38
+ for dir_name in directories:
39
+ dir_path = project_root / dir_name
40
+ dir_path.mkdir(exist_ok=True)
41
+ logger.info(f"๐Ÿ“ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ: {dir_path}")
42
+
43
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
44
+ env_vars = {
45
+ "TRANSFORMERS_CACHE": str(project_root / "cache" / "transformers"),
46
+ "HF_HOME": str(project_root / "cache" / "huggingface"),
47
+ "TORCH_HOME": str(project_root / "cache" / "torch"),
48
+ "TOKENIZERS_PARALLELISM": "false",
49
+ "OMP_NUM_THREADS": "1",
50
+ "MKL_NUM_THREADS": "1"
51
+ }
52
+
53
+ for key, value in env_vars.items():
54
+ os.environ[key] = value
55
+ logger.info(f"๐Ÿ”ง ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์„ค์ •: {key}={value}")
56
+
57
+ async def main():
58
+ """๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
59
+
60
+ logger.info("๐Ÿš€ Hugging Face Spaces์šฉ Lily LLM API ์„œ๋ฒ„ ์‹œ์ž‘")
61
+
62
+ # ํ™˜๊ฒฝ ์„ค์ •
63
+ setup_huggingface_environment()
64
+
65
+ try:
66
+ # FastAPI ์•ฑ import
67
+ from lily_llm_api.app_v2 import app
68
+
69
+ # ์„œ๋ฒ„ ์„ค์ •
70
+ host = os.getenv("HOST", "0.0.0.0")
71
+ port = int(os.getenv("PORT", "7860"))
72
+
73
+ logger.info(f"๐ŸŒ ์„œ๋ฒ„ ์‹œ์ž‘: {host}:{port}")
74
+
75
+ # Uvicorn ์„œ๋ฒ„ ์‹คํ–‰
76
+ config = uvicorn.Config(
77
+ app=app,
78
+ host=host,
79
+ port=port,
80
+ log_level="info",
81
+ access_log=True,
82
+ loop="asyncio"
83
+ )
84
+
85
+ server = uvicorn.Server(config)
86
+ await server.serve()
87
+
88
+ except Exception as e:
89
+ logger.error(f"โŒ ์„œ๋ฒ„ ์‹œ์ž‘ ์˜ค๋ฅ˜: {e}")
90
+ import traceback
91
+ logger.error(f"๐Ÿ” ์ƒ์„ธ ์˜ค๋ฅ˜:\n{traceback.format_exc()}")
92
+ sys.exit(1)
93
+
94
+ if __name__ == "__main__":
95
+ # Python 3.7+ ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•œ asyncio ์‹คํ–‰
96
+ try:
97
+ asyncio.run(main())
98
+ except KeyboardInterrupt:
99
+ logger.info("๐Ÿ‘‹ ์„œ๋ฒ„ ์ข…๋ฃŒ")
100
+ except Exception as e:
101
+ logger.error(f"โŒ ์‹คํ–‰ ์˜ค๋ฅ˜: {e}")
102
+ sys.exit(1)
app_local.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๋กœ์ปฌ ๊ฐœ๋ฐœ์šฉ Lily LLM API Server ์‹œ์ž‘์ 
2
+
3
+ import uvicorn
4
+ from lily_llm_api.app_v2 import app
5
+ import os
6
+ import logging
7
+
8
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
+ logger = logging.getLogger(__name__)
10
+
11
+ if __name__ == "__main__":
12
+ # ๋กœ์ปฌ ๊ฐœ๋ฐœ ํ™˜๊ฒฝ ์„ค์ •
13
+ host = os.getenv("HOST", "0.0.0.0")
14
+ port = int(os.getenv("PORT", 8001))
15
+
16
+ logger.info(f"๐Ÿ  ๋กœ์ปฌ ๊ฐœ๋ฐœ ์„œ๋ฒ„ ์‹œ์ž‘: {host}:{port}")
17
+ logger.info("๐Ÿ“ .env ํŒŒ์ผ์—์„œ ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค")
18
+
19
+ uvicorn.run(app, host=host, port=port, reload=True)
config.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+
deploy_gpu.sh ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ
4
+ echo "๐Ÿš€ GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์‹œ์ž‘"
5
+
6
+ # NVIDIA Docker ์ง€์› ํ™•์ธ
7
+ if ! command -v nvidia-docker &> /dev/null; then
8
+ echo "โŒ NVIDIA Docker๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
9
+ echo "NVIDIA Docker ์„ค์น˜๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
10
+ exit 1
11
+ fi
12
+
13
+ # GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ
14
+ if ! nvidia-smi &> /dev/null; then
15
+ echo "โŒ GPU๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
16
+ echo "GPU ๋“œ๋ผ์ด๋ฒ„๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”."
17
+ exit 1
18
+ fi
19
+
20
+ echo "โœ… GPU ํ™˜๊ฒฝ ํ™•์ธ ์™„๋ฃŒ"
21
+
22
+ # ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
23
+ echo "๐Ÿงน ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ ์ค‘..."
24
+ docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
25
+
26
+ # ์ด๋ฏธ์ง€ ๋นŒ๋“œ
27
+ echo "๐Ÿ”จ Docker ์ด๋ฏธ์ง€ ๋นŒ๋“œ ์ค‘..."
28
+ docker-compose -f docker-compose.gpu.yml build --no-cache
29
+
30
+ # ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘
31
+ echo "๐Ÿš€ ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘ ์ค‘..."
32
+ docker-compose -f docker-compose.gpu.yml up -d
33
+
34
+ # ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
35
+ echo "๐Ÿ“Š ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ ์ค‘..."
36
+ sleep 10
37
+
38
+ # ํ—ฌ์Šค์ฒดํฌ
39
+ echo "๐Ÿฅ ํ—ฌ์Šค์ฒดํฌ ์ค‘..."
40
+ for i in {1..30}; do
41
+ if curl -f http://localhost:8001/health &> /dev/null; then
42
+ echo "โœ… Lily LLM API ์„œ๋น„์Šค ์ •์ƒ"
43
+ break
44
+ fi
45
+
46
+ if [ $i -eq 30 ]; then
47
+ echo "โŒ ์„œ๋น„์Šค ์‹œ์ž‘ ์‹คํŒจ"
48
+ docker-compose -f docker-compose.gpu.yml logs
49
+ exit 1
50
+ fi
51
+
52
+ echo "โณ ์„œ๋น„์Šค ์‹œ์ž‘ ๋Œ€๊ธฐ ์ค‘... ($i/30)"
53
+ sleep 2
54
+ done
55
+
56
+ # GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
57
+ echo "๐ŸŽฎ GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ:"
58
+ nvidia-smi
59
+
60
+ # ์„œ๋น„์Šค ์ •๋ณด ์ถœ๋ ฅ
61
+ echo ""
62
+ echo "๐ŸŽ‰ GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์™„๋ฃŒ!"
63
+ echo ""
64
+ echo "๐Ÿ“‹ ์„œ๋น„์Šค ์ •๋ณด:"
65
+ echo " - Lily LLM API: http://localhost:8001"
66
+ echo " - Hearth Chat: http://localhost:8000"
67
+ echo " - LaTeX-OCR Service: ๋ณ„๋„ ์ปจํ…Œ์ด๋„ˆ๋กœ ์‹คํ–‰ ์ค‘"
68
+ echo ""
69
+ echo "๐Ÿ”ง ์œ ์šฉํ•œ ๋ช…๋ น์–ด:"
70
+ echo " - ๋กœ๊ทธ ํ™•์ธ: docker-compose -f docker-compose.gpu.yml logs -f"
71
+ echo " - ์„œ๋น„์Šค ์ค‘์ง€: docker-compose -f docker-compose.gpu.yml down"
72
+ echo " - ์„œ๋น„์Šค ์žฌ์‹œ์ž‘: docker-compose -f docker-compose.gpu.yml restart"
73
+ echo ""
74
+ echo "๐Ÿงช ํ…Œ์ŠคํŠธ ๋ช…๋ น์–ด:"
75
+ echo " - API ํ…Œ์ŠคํŠธ: curl http://localhost:8001/health"
76
+ echo " - GPU ํ…Œ์ŠคํŠธ: python test_gpu_deployment.py"
deploy_gpu_huggingface.sh ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Hugging Face GPU ๋ฐฐํฌ ์Šคํฌ๋ฆฝํŠธ
4
+ echo "๐Ÿš€ Hugging Face GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์‹œ์ž‘"
5
+
6
+ # GPU ํ™˜๊ฒฝ ํ™•์ธ
7
+ echo "๐Ÿ” GPU ํ™˜๊ฒฝ ํ™•์ธ ์ค‘..."
8
+ python check_gpu_environment.py
9
+
10
+ if [ $? -ne 0 ]; then
11
+ echo "โŒ GPU ํ™˜๊ฒฝ ํ™•์ธ ์‹คํŒจ"
12
+ exit 1
13
+ fi
14
+
15
+ # Hugging Face ์„ค์ •
16
+ echo "๐Ÿ”ง Hugging Face ํ™˜๊ฒฝ ์„ค์ • ์ค‘..."
17
+ python huggingface_gpu_setup.py
18
+
19
+ # ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
20
+ echo "๐Ÿงน ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ ์ค‘..."
21
+ docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
22
+
23
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
24
+ echo "๐Ÿ’พ GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ์ค‘..."
25
+ nvidia-smi --gpu-reset
26
+
27
+ # ์ด๋ฏธ์ง€ ๋นŒ๋“œ
28
+ echo "๐Ÿ”จ Docker ์ด๋ฏธ์ง€ ๋นŒ๋“œ ์ค‘..."
29
+ docker-compose -f docker-compose.gpu.yml build --no-cache
30
+
31
+ # ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘
32
+ echo "๐Ÿš€ ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘ ์ค‘..."
33
+ docker-compose -f docker-compose.gpu.yml up -d
34
+
35
+ # ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
36
+ echo "๐Ÿ“Š ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ ์ค‘..."
37
+ sleep 15
38
+
39
+ # ํ—ฌ์Šค์ฒดํฌ
40
+ echo "๐Ÿฅ ํ—ฌ์Šค์ฒดํฌ ์ค‘..."
41
+ for i in {1..30}; do
42
+ if curl -f http://localhost:8001/health &> /dev/null; then
43
+ echo "โœ… Lily LLM API ์„œ๋น„์Šค ์ •์ƒ"
44
+ break
45
+ fi
46
+
47
+ if [ $i -eq 30 ]; then
48
+ echo "โŒ ์„œ๋น„์Šค ์‹œ์ž‘ ์‹คํŒจ"
49
+ docker-compose -f docker-compose.gpu.yml logs
50
+ exit 1
51
+ fi
52
+
53
+ echo "โณ ์„œ๋น„์Šค ์‹œ์ž‘ ๋Œ€๊ธฐ ์ค‘... ($i/30)"
54
+ sleep 2
55
+ done
56
+
57
+ # GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
58
+ echo "๐ŸŽฎ GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ:"
59
+ nvidia-smi
60
+
61
+ # Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
62
+ echo "๐Ÿงช Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ์ค‘..."
63
+ python test_gpu_deployment.py
64
+
65
+ # ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ ์šฉ
66
+ echo "โšก ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ ์šฉ ์ค‘..."
67
+ python performance_optimization.py
68
+
69
+ # ์„œ๋น„์Šค ์ •๋ณด ์ถœ๋ ฅ
70
+ echo ""
71
+ echo "๐ŸŽ‰ Hugging Face GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์™„๋ฃŒ!"
72
+ echo ""
73
+ echo "๐Ÿ“‹ ์„œ๋น„์Šค ์ •๋ณด:"
74
+ echo " - Lily LLM API: http://localhost:8001"
75
+ echo " - Hearth Chat: http://localhost:8000"
76
+ echo " - LaTeX-OCR Service: ๋ณ„๋„ ์ปจํ…Œ์ด๋„ˆ๋กœ ์‹คํ–‰ ์ค‘"
77
+ echo ""
78
+ echo "๐Ÿ”ง ์œ ์šฉํ•œ ๋ช…๋ น์–ด:"
79
+ echo " - ๋กœ๊ทธ ํ™•์ธ: docker-compose -f docker-compose.gpu.yml logs -f"
80
+ echo " - ์„œ๋น„์Šค ์ค‘์ง€: docker-compose -f docker-compose.gpu.yml down"
81
+ echo " - ์„œ๋น„์Šค ์žฌ์‹œ์ž‘: docker-compose -f docker-compose.gpu.yml restart"
82
+ echo ""
83
+ echo "๐Ÿงช ํ…Œ์ŠคํŠธ ๋ช…๋ น์–ด:"
84
+ echo " - API ํ…Œ์ŠคํŠธ: curl http://localhost:8001/health"
85
+ echo " - GPU ํ…Œ์ŠคํŠธ: python test_gpu_deployment.py"
86
+ echo " - Hugging Face ํ…Œ์ŠคํŠธ: python huggingface_gpu_setup.py"
87
+ echo ""
88
+ echo "๐Ÿ’ก Hugging Face ๋ชจ๋ธ ์‚ฌ์šฉ:"
89
+ echo " - ๋ชจ๋ธ ๋กœ๋“œ: python huggingface_gpu_setup.py"
90
+ echo " - ํ† ํฐ ์„ค์ •: huggingface-cli login"
deploy_gpu_windows.bat ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ echo ๐Ÿš€ Windows GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์‹œ์ž‘
3
+ echo ========================================
4
+
5
+ REM GPU ํ™˜๊ฒฝ ํ™•์ธ
6
+ echo ๐Ÿ” GPU ํ™˜๊ฒฝ ํ™•์ธ ์ค‘...
7
+ python check_gpu_environment.py
8
+ if %errorlevel% neq 0 (
9
+ echo โŒ GPU ํ™˜๊ฒฝ ํ™•์ธ ์‹คํŒจ
10
+ echo ๐Ÿ’ก GPU ํ™˜๊ฒฝ์„ ๋จผ์ € ์„ค์ •ํ•ด์ฃผ์„ธ์š”
11
+ pause
12
+ exit /b 1
13
+ )
14
+
15
+ REM Hugging Face ์„ค์ •
16
+ echo ๐Ÿ”ง Hugging Face ํ™˜๊ฒฝ ์„ค์ • ์ค‘...
17
+ python huggingface_gpu_setup.py
18
+
19
+ REM ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
20
+ echo ๐Ÿงน ๊ธฐ์กด ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ ์ค‘...
21
+ docker-compose -f docker-compose.gpu.yml down --volumes --remove-orphans
22
+
23
+ REM GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ (Windows์—์„œ๋Š” ์ œํ•œ์ )
24
+ echo ๐Ÿ’พ GPU ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ ์ค‘...
25
+ REM Windows์—์„œ๋Š” nvidia-smi --gpu-reset๊ฐ€ ์ œํ•œ์ ์ด๋ฏ€๋กœ ๊ฑด๋„ˆ๋œ€
26
+
27
+ REM ์ด๋ฏธ์ง€ ๋นŒ๋“œ
28
+ echo ๐Ÿ”จ Docker ์ด๋ฏธ์ง€ ๋นŒ๋“œ ์ค‘...
29
+ docker-compose -f docker-compose.gpu.yml build --no-cache
30
+
31
+ REM ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘
32
+ echo ๐Ÿš€ ์ปจํ…Œ์ด๋„ˆ ์‹œ์ž‘ ์ค‘...
33
+ docker-compose -f docker-compose.gpu.yml up -d
34
+
35
+ REM ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ
36
+ echo ๐Ÿ“Š ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ ์ค‘...
37
+ timeout /t 15 /nobreak >nul
38
+
39
+ REM ํ—ฌ์Šค์ฒดํฌ
40
+ echo ๐Ÿฅ ํ—ฌ์Šค์ฒดํฌ ์ค‘...
41
+ for /l %%i in (1,1,30) do (
42
+ curl -f http://localhost:8001/health >nul 2>&1
43
+ if !errorlevel! equ 0 (
44
+ echo โœ… Lily LLM API ์„œ๋น„์Šค ์ •์ƒ
45
+ goto :health_check_passed
46
+ )
47
+ echo โณ ์„œ๋น„์Šค ์‹œ์ž‘ ๋Œ€๊ธฐ ์ค‘... (%%i/30)
48
+ timeout /t 2 /nobreak >nul
49
+ )
50
+ echo โŒ ์„œ๋น„์Šค ์‹œ์ž‘ ์‹คํŒจ
51
+ docker-compose -f docker-compose.gpu.yml logs
52
+ pause
53
+ exit /b 1
54
+
55
+ :health_check_passed
56
+ REM GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
57
+ echo ๐ŸŽฎ GPU ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ:
58
+ nvidia-smi 2>nul || echo โš ๏ธ nvidia-smi๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค
59
+
60
+ REM Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
61
+ echo ๐Ÿงช Hugging Face ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ์ค‘...
62
+ python test_gpu_deployment.py
63
+
64
+ REM ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ ์šฉ
65
+ echo โšก ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ ์šฉ ์ค‘...
66
+ python performance_optimization.py
67
+
68
+ REM ์„œ๋น„์Šค ์ •๋ณด ์ถœ๋ ฅ
69
+ echo.
70
+ echo ๐ŸŽ‰ Windows GPU ํ™˜๊ฒฝ ๋ฐฐํฌ ์™„๋ฃŒ!
71
+ echo.
72
+ echo ๐Ÿ“‹ ์„œ๋น„์Šค ์ •๋ณด:
73
+ echo - Lily LLM API: http://localhost:8001
74
+ echo - Hearth Chat: http://localhost:8000
75
+ echo - LaTeX-OCR Service: ๋ณ„๋„ ์ปจํ…Œ์ด๋„ˆ๋กœ ์‹คํ–‰ ์ค‘
76
+ echo.
77
+ echo ๐Ÿ”ง ์œ ์šฉํ•œ ๋ช…๋ น์–ด:
78
+ echo - ๋กœ๊ทธ ํ™•์ธ: docker-compose -f docker-compose.gpu.yml logs -f
79
+ echo - ์„œ๋น„์Šค ์ค‘์ง€: docker-compose -f docker-compose.gpu.yml down
80
+ echo - ์„œ๋น„์Šค ์žฌ์‹œ์ž‘: docker-compose -f docker-compose.gpu.yml restart
81
+ echo.
82
+ echo ๐Ÿงช ํ…Œ์ŠคํŠธ ๋ช…๋ น์–ด:
83
+ echo - API ํ…Œ์ŠคํŠธ: curl http://localhost:8001/health
84
+ echo - GPU ํ…Œ์ŠคํŠธ: python test_gpu_deployment.py
85
+ echo - Hugging Face ํ…Œ์ŠคํŠธ: python huggingface_gpu_setup.py
86
+ echo.
87
+ echo ๐Ÿ’ก Hugging Face ๋ชจ๋ธ ์‚ฌ์šฉ:
88
+ echo - ๋ชจ๋ธ ๋กœ๋“œ: python huggingface_gpu_setup.py
89
+ echo - ํ† ํฐ ์„ค์ •: huggingface-cli login
90
+ echo.
91
+ pause
docker-compose.gpu.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ lily-llm-api-gpu:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile.gpu
8
+ container_name: lily-llm-api-gpu
9
+ ports:
10
+ - "8001:8001"
11
+ volumes:
12
+ - ./uploads:/app/uploads
13
+ - ./vector_stores:/app/vector_stores
14
+ - ./latex_ocr_faiss_stores:/app/latex_ocr_faiss_stores
15
+ - ./lily_llm_media:/app/lily_llm_media
16
+ - ./hearth_llm_model:/app/hearth_llm_model
17
+ environment:
18
+ - CUDA_VISIBLE_DEVICES=0
19
+ - PYTHONPATH=/app
20
+ - LILY_LLM_ENV=production
21
+ deploy:
22
+ resources:
23
+ reservations:
24
+ devices:
25
+ - driver: nvidia
26
+ count: 1
27
+ capabilities: [ gpu ]
28
+ restart: unless-stopped
29
+ networks:
30
+ - lily-network
31
+
32
+ # LaTeX-OCR ์ „์šฉ ์ปจํ…Œ์ด๋„ˆ (CPU ๊ธฐ๋ฐ˜)
33
+ latex-ocr-service:
34
+ build:
35
+ context: .
36
+ dockerfile: Dockerfile.latex-ocr
37
+ container_name: latex-ocr-service
38
+ volumes:
39
+ - ./uploads:/app/uploads
40
+ - ./latex_ocr_faiss_stores:/app/latex_ocr_faiss_stores
41
+ environment:
42
+ - PYTHONPATH=/app
43
+ restart: unless-stopped
44
+ networks:
45
+ - lily-network
46
+
47
+ # Hearth Chat ์„œ๋น„์Šค (๋ณ„๋„ ์ปจํ…Œ์ด๋„ˆ)
48
+ hearth-chat:
49
+ image: node:18-alpine
50
+ container_name: hearth-chat
51
+ working_dir: /app
52
+ volumes:
53
+ - ../hearth_chat_package:/app
54
+ ports:
55
+ - "8000:8000"
56
+ command: [ "npm", "start" ]
57
+ restart: unless-stopped
58
+ networks:
59
+ - lily-network
60
+
61
+ networks:
62
+ lily-network:
63
+ driver: bridge
64
+
65
+ volumes:
66
+ lily-data:
docker-compose.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # Lily LLM API Server
5
+ lily-llm-api:
6
+ build: .
7
+ container_name: lily-llm-api
8
+ ports:
9
+ - "8001:8001"
10
+ environment:
11
+ - REDIS_URL=redis://redis:6379
12
+ - DATABASE_URL=sqlite:///app/data/lily_llm.db
13
+ - LOG_LEVEL=INFO
14
+ - CELERY_BROKER_URL=redis://redis:6379/0
15
+ - CELERY_RESULT_BACKEND=redis://redis:6379/0
16
+ volumes:
17
+ - ./data:/app/data
18
+ - ./logs:/app/logs
19
+ - ./models:/app/models
20
+ - ./uploads:/app/uploads
21
+ depends_on:
22
+ - redis
23
+ restart: unless-stopped
24
+ networks:
25
+ - lily-network
26
+
27
+ # Redis for Celery and caching
28
+ redis:
29
+ image: redis:7-alpine
30
+ container_name: lily-redis
31
+ ports:
32
+ - "6379:6379"
33
+ volumes:
34
+ - redis_data:/data
35
+ restart: unless-stopped
36
+ networks:
37
+ - lily-network
38
+
39
+ # Celery Worker
40
+ celery-worker:
41
+ build: .
42
+ container_name: lily-celery-worker
43
+ command: celery -A lily_llm_core.celery_app worker --loglevel=info
44
+ environment:
45
+ - REDIS_URL=redis://redis:6379
46
+ - DATABASE_URL=sqlite:///app/data/lily_llm.db
47
+ - CELERY_BROKER_URL=redis://redis:6379/0
48
+ - CELERY_RESULT_BACKEND=redis://redis:6379/0
49
+ volumes:
50
+ - ./data:/app/data
51
+ - ./logs:/app/logs
52
+ - ./models:/app/models
53
+ - ./uploads:/app/uploads
54
+ depends_on:
55
+ - redis
56
+ restart: unless-stopped
57
+ networks:
58
+ - lily-network
59
+
60
+ # Celery Beat (Scheduler)
61
+ celery-beat:
62
+ build: .
63
+ container_name: lily-celery-beat
64
+ command: celery -A lily_llm_core.celery_app beat --loglevel=info
65
+ environment:
66
+ - REDIS_URL=redis://redis:6379
67
+ - DATABASE_URL=sqlite:///app/data/lily_llm.db
68
+ - CELERY_BROKER_URL=redis://redis:6379/0
69
+ - CELERY_RESULT_BACKEND=redis://redis:6379/0
70
+ volumes:
71
+ - ./data:/app/data
72
+ - ./logs:/app/logs
73
+ depends_on:
74
+ - redis
75
+ restart: unless-stopped
76
+ networks:
77
+ - lily-network
78
+
79
+ # Flower (Celery Monitoring)
80
+ flower:
81
+ build: .
82
+ container_name: lily-flower
83
+ command: celery -A lily_llm_core.celery_app flower --port=5555
84
+ ports:
85
+ - "5555:5555"
86
+ environment:
87
+ - CELERY_BROKER_URL=redis://redis:6379/0
88
+ - CELERY_RESULT_BACKEND=redis://redis:6379/0
89
+ depends_on:
90
+ - redis
91
+ restart: unless-stopped
92
+ networks:
93
+ - lily-network
94
+
95
+ volumes:
96
+ redis_data:
97
+
98
+
99
+ networks:
100
+ lily-network:
101
+ driver: bridge
docs/API_REFERENCE.md ADDED
@@ -0,0 +1,507 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API ์ฐธ์กฐ ๋ฌธ์„œ
2
+
3
+ ## ๐Ÿ“‹ ๊ฐœ์š”
4
+
5
+ Lily LLM API๋Š” ๋‹ค์–‘ํ•œ ์–ธ์–ด ๋ชจ๋ธ์„ ์ง€์›ํ•˜๋Š” RESTful API ์„œ๋ฒ„์ž…๋‹ˆ๋‹ค. ํ…์ŠคํŠธ ์ƒ์„ฑ, ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ, RAG(Retrieval-Augmented Generation) ๊ธฐ๋Šฅ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
6
+
7
+ ## ๐Ÿ”— ๊ธฐ๋ณธ ์ •๋ณด
8
+
9
+ - **Base URL**: `http://localhost:8001`
10
+ - **API ๋ฌธ์„œ**: `http://localhost:8001/docs`
11
+ - **ReDoc ๋ฌธ์„œ**: `http://localhost:8001/redoc`
12
+
13
+ ## ๐Ÿ” ์ธ์ฆ
14
+
15
+ ### JWT ํ† ํฐ ์ธ์ฆ
16
+
17
+ ```bash
18
+ # ๋กœ๊ทธ์ธ
19
+ curl -X POST "http://localhost:8001/auth/login" \
20
+ -H "Content-Type: application/x-www-form-urlencoded" \
21
+ -d "username=your_username&password=your_password"
22
+
23
+ # ์‘๋‹ต
24
+ {
25
+ "access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9...",
26
+ "refresh_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9...",
27
+ "token_type": "bearer"
28
+ }
29
+ ```
30
+
31
+ ### ๋ณดํ˜ธ๋œ ์—”๋“œํฌ์ธํŠธ ์‚ฌ์šฉ
32
+
33
+ ```bash
34
+ curl -X GET "http://localhost:8001/auth/me" \
35
+ -H "Authorization: Bearer YOUR_ACCESS_TOKEN"
36
+ ```
37
+
38
+ ## ๐Ÿค– AI ๋ชจ๋ธ ๊ด€๋ จ
39
+
40
+ ### 1. ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํšŒ
41
+
42
+ ```http
43
+ GET /models
44
+ ```
45
+
46
+ **์‘๋‹ต ์˜ˆ์‹œ:**
47
+ ```json
48
+ {
49
+ "available_models": [
50
+ {
51
+ "model_id": "polyglot-ko-1.3b-chat",
52
+ "display_name": "Polyglot Korean 1.3B Chat",
53
+ "model_type": "text",
54
+ "description": "ํ•œ๊ตญ์–ด ํŠนํ™” ํ…์ŠคํŠธ ์ƒ์„ฑ ๋ชจ๋ธ"
55
+ },
56
+ {
57
+ "model_id": "kanana-1.5-v-3b-instruct",
58
+ "display_name": "Kanana 1.5 v3B Instruct",
59
+ "model_type": "multimodal",
60
+ "description": "๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ด๋ฏธ์ง€+ํ…์ŠคํŠธ ์ฒ˜๋ฆฌ ๋ชจ๋ธ"
61
+ }
62
+ ],
63
+ "current_model": "polyglot-ko-1.3b-chat"
64
+ }
65
+ ```
66
+
67
+ ### 2. ํ…์ŠคํŠธ ์ƒ์„ฑ
68
+
69
+ ```http
70
+ POST /generate
71
+ ```
72
+
73
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
74
+ ```json
75
+ {
76
+ "prompt": "์•ˆ๋…•ํ•˜์„ธ์š”, AI์— ๋Œ€ํ•ด ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”.",
77
+ "model_id": "polyglot-ko-1.3b-chat",
78
+ "max_length": 200,
79
+ "temperature": 0.7,
80
+ "top_p": 0.9,
81
+ "do_sample": true
82
+ }
83
+ ```
84
+
85
+ **์‘๋‹ต ์˜ˆ์‹œ:**
86
+ ```json
87
+ {
88
+ "generated_text": "์•ˆ๋…•ํ•˜์„ธ์š”! AI(์ธ๊ณต์ง€๋Šฅ)๋Š” ์ธ๊ฐ„์˜ ํ•™์Šต๋Šฅ๋ ฅ๊ณผ ์ถ”๋ก ๋Šฅ๋ ฅ์„ ์ธ๊ณต์ ์œผ๋กœ ๊ตฌํ˜„ํ•œ ์ปดํ“จํ„ฐ ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค...",
89
+ "model_name": "polyglot-ko-1.3b-chat",
90
+ "processing_time": 2.34,
91
+ "tokens_generated": 45
92
+ }
93
+ ```
94
+
95
+ ### 3. ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ (์ด๋ฏธ์ง€ + ํ…์ŠคํŠธ)
96
+
97
+ ```http
98
+ POST /generate-multimodal
99
+ ```
100
+
101
+ **์š”์ฒญ (multipart/form-data):**
102
+ ```
103
+ prompt: "์ด ์ด๋ฏธ์ง€์— ๋Œ€ํ•ด ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”"
104
+ model_id: "kanana-1.5-v-3b-instruct"
105
+ max_length: 200
106
+ temperature: 0.7
107
+ image_files: [ํŒŒ์ผ1, ํŒŒ์ผ2, ...]
108
+ ```
109
+
110
+ **์‘๋‹ต ์˜ˆ์‹œ:**
111
+ ```json
112
+ {
113
+ "generated_text": "์ด ์ด๋ฏธ์ง€๋Š” ์•„๋ฆ„๋‹ค์šด ์ž์—ฐ ํ’๊ฒฝ์„ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค...",
114
+ "model_name": "kanana-1.5-v-3b-instruct",
115
+ "processing_time": 15.67,
116
+ "images_processed": 2
117
+ }
118
+ ```
119
+
120
+ ## ๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ (RAG)
121
+
122
+ ### 1. ๋ฌธ์„œ ์—…๋กœ๋“œ
123
+
124
+ ```http
125
+ POST /document/upload
126
+ ```
127
+
128
+ **์š”์ฒญ (multipart/form-data):**
129
+ ```
130
+ file: [PDF/DOC/DOCX/PPTX ํŒŒ์ผ]
131
+ user_id: "user123"
132
+ ```
133
+
134
+ **์‘๋‹ต ์˜ˆ์‹œ:**
135
+ ```json
136
+ {
137
+ "document_id": "doc_123456",
138
+ "filename": "sample.pdf",
139
+ "file_type": "pdf",
140
+ "file_size": 1024000,
141
+ "pages": 15,
142
+ "chunks": 45,
143
+ "upload_time": "2025-08-04T10:30:00Z"
144
+ }
145
+ ```
146
+
147
+ ### 2. RAG ์ฟผ๋ฆฌ
148
+
149
+ ```http
150
+ POST /rag/generate
151
+ ```
152
+
153
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
154
+ ```json
155
+ {
156
+ "query": "์ธ๊ณต์ง€๋Šฅ์˜ ๋ฏธ๋ž˜์— ๋Œ€ํ•ด ์•Œ๋ ค์ฃผ์„ธ์š”",
157
+ "user_id": "user123",
158
+ "max_length": 300,
159
+ "temperature": 0.7
160
+ }
161
+ ```
162
+
163
+ **์‘๋‹ต ์˜ˆ์‹œ:**
164
+ ```json
165
+ {
166
+ "response": "์ธ๊ณต์ง€๋Šฅ์˜ ๋ฏธ๋ž˜๋Š” ๋งค์šฐ ๋ฐ์Šต๋‹ˆ๋‹ค. ํ˜„์žฌ ๋ฌธ์„œ์— ๋”ฐ๋ฅด๋ฉด...",
167
+ "sources": [
168
+ {
169
+ "document_id": "doc_123456",
170
+ "page": 5,
171
+ "chunk": "AI ๊ธฐ์ˆ ์˜ ๋ฐœ์ „ ๋ฐฉํ–ฅ..."
172
+ }
173
+ ],
174
+ "confidence": 0.85,
175
+ "processing_time": 3.45
176
+ }
177
+ ```
178
+
179
+ ### 3. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG (์ด๋ฏธ์ง€ + ๋ฌธ์„œ)
180
+
181
+ ```http
182
+ POST /rag/generate-hybrid
183
+ ```
184
+
185
+ **์š”์ฒญ (multipart/form-data):**
186
+ ```
187
+ query: "์ด ์ด๋ฏธ์ง€์™€ ๊ด€๋ จ๋œ ๋ฌธ์„œ ๋‚ด์šฉ์„ ์ฐพ์•„์ฃผ์„ธ์š”"
188
+ user_id: "user123"
189
+ image_files: [์ด๋ฏธ์ง€ ํŒŒ์ผ๋“ค]
190
+ max_length: 300
191
+ temperature: 0.7
192
+ ```
193
+
194
+ ## ๐Ÿ’ฌ ์ฑ„ํŒ… ๋ฐ ์„ธ์…˜ ๊ด€๋ฆฌ
195
+
196
+ ### 1. ์‚ฌ์šฉ์ž ์ƒ์„ฑ
197
+
198
+ ```http
199
+ POST /user/create
200
+ ```
201
+
202
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
203
+ ```json
204
+ {
205
+ "user_id": "user123",
206
+ "username": "ํ…Œ์ŠคํŠธ์‚ฌ์šฉ์ž",
207
+ "email": "test@example.com"
208
+ }
209
+ ```
210
+
211
+ ### 2. ์ฑ„ํŒ… ์„ธ์…˜ ์ƒ์„ฑ
212
+
213
+ ```http
214
+ POST /session/create
215
+ ```
216
+
217
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
218
+ ```json
219
+ {
220
+ "user_id": "user123",
221
+ "session_name": "AI ์ƒ๋‹ด ์„ธ์…˜"
222
+ }
223
+ ```
224
+
225
+ ### 3. ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
226
+
227
+ ```http
228
+ POST /chat/message
229
+ ```
230
+
231
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
232
+ ```json
233
+ {
234
+ "session_id": "session_123",
235
+ "user_id": "user123",
236
+ "message_type": "text",
237
+ "content": "์•ˆ๋…•ํ•˜์„ธ์š”!"
238
+ }
239
+ ```
240
+
241
+ ### 4. ์ฑ„ํŒ… ๊ธฐ๋ก ์กฐํšŒ
242
+
243
+ ```http
244
+ GET /chat/history/{session_id}
245
+ ```
246
+
247
+ ## ๐Ÿ”„ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…
248
+
249
+ ### 1. ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—…
250
+
251
+ ```http
252
+ POST /tasks/document/process
253
+ ```
254
+
255
+ **์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ:**
256
+ ```json
257
+ {
258
+ "file_path": "/uploads/document.pdf",
259
+ "user_id": "user123"
260
+ }
261
+ ```
262
+
263
+ **์‘๋‹ต ์˜ˆ์‹œ:**
264
+ ```json
265
+ {
266
+ "task_id": "task_123456",
267
+ "status": "PENDING",
268
+ "message": "๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๏ฟฝ๏ฟฝ๏ฟฝ๋‹ค."
269
+ }
270
+ ```
271
+
272
+ ### 2. ์ž‘์—… ์ƒํƒœ ํ™•์ธ
273
+
274
+ ```http
275
+ GET /tasks/{task_id}
276
+ ```
277
+
278
+ **์‘๋‹ต ์˜ˆ์‹œ:**
279
+ ```json
280
+ {
281
+ "task_id": "task_123456",
282
+ "status": "SUCCESS",
283
+ "result": {
284
+ "document_id": "doc_123456",
285
+ "chunks": 45
286
+ },
287
+ "progress": 100
288
+ }
289
+ ```
290
+
291
+ ## ๐Ÿ“Š ๋ชจ๋‹ˆํ„ฐ๋ง
292
+
293
+ ### 1. ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘
294
+
295
+ ```http
296
+ POST /monitoring/start
297
+ ```
298
+
299
+ ### 2. ์„ฑ๋Šฅ ์ƒํƒœ ์กฐํšŒ
300
+
301
+ ```http
302
+ GET /monitoring/status
303
+ ```
304
+
305
+ **์‘๋‹ต ์˜ˆ์‹œ:**
306
+ ```json
307
+ {
308
+ "current_metrics": {
309
+ "cpu_percent": 25.5,
310
+ "memory_percent": 68.2,
311
+ "memory_used_mb": 8192.0,
312
+ "disk_usage_percent": 45.0
313
+ },
314
+ "performance_stats": {
315
+ "avg_response_time": 1.23,
316
+ "avg_inference_time": 2.45,
317
+ "total_requests": 1250,
318
+ "success_rate": 98.5
319
+ },
320
+ "system_health": {
321
+ "status": "healthy",
322
+ "recommendations": []
323
+ }
324
+ }
325
+ ```
326
+
327
+ ### 3. ์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ
328
+
329
+ ```http
330
+ GET /monitoring/health
331
+ ```
332
+
333
+ ## ๐Ÿ”Œ WebSocket
334
+
335
+ ### ์—ฐ๊ฒฐ
336
+
337
+ ```javascript
338
+ const ws = new WebSocket('ws://localhost:8001/ws/user123');
339
+
340
+ ws.onopen = function() {
341
+ console.log('WebSocket ์—ฐ๊ฒฐ๋จ');
342
+ };
343
+
344
+ ws.onmessage = function(event) {
345
+ const data = JSON.parse(event.data);
346
+ console.log('๋ฉ”์‹œ์ง€ ์ˆ˜์‹ :', data);
347
+ };
348
+ ```
349
+
350
+ ### ๋ฉ”์‹œ์ง€ ์ „์†ก
351
+
352
+ ```javascript
353
+ ws.send(JSON.stringify({
354
+ type: 'chat',
355
+ message: '์•ˆ๋…•ํ•˜์„ธ์š”!',
356
+ session_id: 'session_123'
357
+ }));
358
+ ```
359
+
360
+ ## ๐Ÿšจ ์˜ค๋ฅ˜ ์ฝ”๋“œ
361
+
362
+ | ์ฝ”๋“œ | ์˜๋ฏธ | ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• |
363
+ |------|------|-----------|
364
+ | 400 | ์ž˜๋ชป๋œ ์š”์ฒญ | ์š”์ฒญ ํŒŒ๋ผ๋ฏธํ„ฐ ํ™•์ธ |
365
+ | 401 | ์ธ์ฆ ์‹คํŒจ | ํ† ํฐ ํ™•์ธ |
366
+ | 403 | ๊ถŒํ•œ ์—†์Œ | ๊ถŒํ•œ ํ™•์ธ |
367
+ | 404 | ๋ฆฌ์†Œ์Šค ์—†์Œ | URL ํ™•์ธ |
368
+ | 422 | ๊ฒ€์ฆ ์‹คํŒจ | ์š”์ฒญ ๋ฐ์ดํ„ฐ ํ˜•์‹ ํ™•์ธ |
369
+ | 500 | ์„œ๋ฒ„ ์˜ค๋ฅ˜ | ์„œ๋ฒ„ ๋กœ๊ทธ ํ™•์ธ |
370
+ | 503 | ์„œ๋น„์Šค ๋ถˆ๊ฐ€ | ์„œ๋น„์Šค ์ƒํƒœ ํ™•์ธ |
371
+
372
+ ## ๐Ÿ“ ์˜ˆ์ œ ์ฝ”๋“œ
373
+
374
+ ### Python ํด๋ผ์ด์–ธํŠธ
375
+
376
+ ```python
377
+ import requests
378
+ import json
379
+
380
+ class LilyLLMClient:
381
+ def __init__(self, base_url="http://localhost:8001"):
382
+ self.base_url = base_url
383
+ self.token = None
384
+
385
+ def login(self, username, password):
386
+ response = requests.post(f"{self.base_url}/auth/login",
387
+ data={"username": username, "password": password})
388
+ if response.status_code == 200:
389
+ self.token = response.json()["access_token"]
390
+ return True
391
+ return False
392
+
393
+ def generate_text(self, prompt, model_id="polyglot-ko-1.3b-chat"):
394
+ headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
395
+ data = {
396
+ "prompt": prompt,
397
+ "model_id": model_id,
398
+ "max_length": 200,
399
+ "temperature": 0.7
400
+ }
401
+ response = requests.post(f"{self.base_url}/generate",
402
+ data=data, headers=headers)
403
+ return response.json()
404
+
405
+ def upload_document(self, file_path, user_id):
406
+ headers = {"Authorization": f"Bearer {self.token}"} if self.token else {}
407
+ with open(file_path, 'rb') as f:
408
+ files = {'file': f}
409
+ data = {'user_id': user_id}
410
+ response = requests.post(f"{self.base_url}/document/upload",
411
+ files=files, data=data, headers=headers)
412
+ return response.json()
413
+
414
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
415
+ client = LilyLLMClient()
416
+ if client.login("username", "password"):
417
+ result = client.generate_text("์•ˆ๋…•ํ•˜์„ธ์š”!")
418
+ print(result["generated_text"])
419
+ ```
420
+
421
+ ### JavaScript ํด๋ผ์ด์–ธํŠธ
422
+
423
+ ```javascript
424
+ class LilyLLMClient {
425
+ constructor(baseUrl = 'http://localhost:8001') {
426
+ this.baseUrl = baseUrl;
427
+ this.token = null;
428
+ }
429
+
430
+ async login(username, password) {
431
+ const response = await fetch(`${this.baseUrl}/auth/login`, {
432
+ method: 'POST',
433
+ headers: {
434
+ 'Content-Type': 'application/x-www-form-urlencoded',
435
+ },
436
+ body: `username=${username}&password=${password}`
437
+ });
438
+
439
+ if (response.ok) {
440
+ const data = await response.json();
441
+ this.token = data.access_token;
442
+ return true;
443
+ }
444
+ return false;
445
+ }
446
+
447
+ async generateText(prompt, modelId = 'polyglot-ko-1.3b-chat') {
448
+ const headers = this.token ?
449
+ {'Authorization': `Bearer ${this.token}`} : {};
450
+
451
+ const formData = new FormData();
452
+ formData.append('prompt', prompt);
453
+ formData.append('model_id', modelId);
454
+ formData.append('max_length', '200');
455
+ formData.append('temperature', '0.7');
456
+
457
+ const response = await fetch(`${this.baseUrl}/generate`, {
458
+ method: 'POST',
459
+ headers,
460
+ body: formData
461
+ });
462
+
463
+ return await response.json();
464
+ }
465
+ }
466
+
467
+ // ์‚ฌ์šฉ ์˜ˆ์ œ
468
+ const client = new LilyLLMClient();
469
+ client.login('username', 'password').then(async (success) => {
470
+ if (success) {
471
+ const result = await client.generateText('์•ˆ๋…•ํ•˜์„ธ์š”!');
472
+ console.log(result.generated_text);
473
+ }
474
+ });
475
+ ```
476
+
477
+ ## ๐Ÿ”ง ์„ค์ •
478
+
479
+ ### ํ™˜๊ฒฝ ๋ณ€์ˆ˜
480
+
481
+ ```bash
482
+ # ์„œ๋ฒ„ ์„ค์ •
483
+ HOST=0.0.0.0
484
+ PORT=8001
485
+ LOG_LEVEL=INFO
486
+
487
+ # ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค
488
+ DATABASE_URL=sqlite:///app/data/lily_llm.db
489
+
490
+ # Redis
491
+ REDIS_URL=redis://localhost:6379
492
+
493
+ # Celery
494
+ CELERY_BROKER_URL=redis://localhost:6379/0
495
+ CELERY_RESULT_BACKEND=redis://localhost:6379/0
496
+
497
+ # ๋ณด์•ˆ
498
+ SECRET_KEY=your-secret-key
499
+ JWT_SECRET_KEY=your-jwt-secret-key
500
+ ```
501
+
502
+ ## ๐Ÿ“š ์ถ”๊ฐ€ ๋ฆฌ์†Œ์Šค
503
+
504
+ - [FastAPI ๋ฌธ์„œ](https://fastapi.tiangolo.com/)
505
+ - [Celery ๋ฌธ์„œ](https://docs.celeryproject.org/)
506
+ - [Redis ๋ฌธ์„œ](https://redis.io/documentation)
507
+ - [LangChain ๋ฌธ์„œ](https://python.langchain.com/)
docs/USER_GUIDE.md ADDED
@@ -0,0 +1,719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lily LLM API ์‚ฌ์šฉ์ž ๊ฐ€์ด๋“œ
2
+
3
+ ## ๐Ÿ“‹ ๋ชฉ์ฐจ
4
+
5
+ 1. [์‹œ์ž‘ํ•˜๊ธฐ](#์‹œ์ž‘ํ•˜๊ธฐ)
6
+ 2. [๊ธฐ๋ณธ ๊ธฐ๋Šฅ](#๊ธฐ๋ณธ-๊ธฐ๋Šฅ)
7
+ 3. [๊ณ ๊ธ‰ ๊ธฐ๋Šฅ](#๊ณ ๊ธ‰-๊ธฐ๋Šฅ)
8
+ 4. [๋ฌธ์ œ ํ•ด๊ฒฐ](#๋ฌธ์ œ-ํ•ด๊ฒฐ)
9
+ 5. [๋ชจ๋ฒ” ์‚ฌ๋ก€](#๋ชจ๋ฒ”-์‚ฌ๋ก€)
10
+
11
+ ## ๐Ÿš€ ์‹œ์ž‘ํ•˜๊ธฐ
12
+
13
+ ### ์‹œ์Šคํ…œ ์š”๊ตฌ์‚ฌํ•ญ
14
+
15
+ - **์ตœ์†Œ ์‚ฌ์–‘**:
16
+ - CPU: 4์ฝ”์–ด ์ด์ƒ
17
+ - RAM: 8GB ์ด์ƒ
18
+ - ์ €์žฅ๊ณต๊ฐ„: 20GB ์ด์ƒ
19
+ - GPU: ์„ ํƒ์‚ฌํ•ญ (CUDA ์ง€์› ์‹œ ์„ฑ๋Šฅ ํ–ฅ์ƒ)
20
+
21
+ - **๊ถŒ์žฅ ์‚ฌ์–‘**:
22
+ - CPU: 8์ฝ”์–ด ์ด์ƒ
23
+ - RAM: 16GB ์ด์ƒ
24
+ - ์ €์žฅ๊ณต๊ฐ„: 50GB ์ด์ƒ
25
+ - GPU: NVIDIA RTX 3060 ์ด์ƒ (CUDA ์ง€์›)
26
+
27
+ ### ์„ค์น˜ ๋ฐ ์‹คํ–‰
28
+
29
+ #### 1. Docker๋ฅผ ์‚ฌ์šฉํ•œ ๋ฐฐํฌ (๊ถŒ์žฅ)
30
+
31
+ ```bash
32
+ # ์ €์žฅ์†Œ ํด๋ก 
33
+ git clone <repository-url>
34
+ cd lily_generate_package
35
+
36
+ # ๋ฐฐํฌ ์‹คํ–‰
37
+ chmod +x scripts/deploy.sh
38
+ ./scripts/deploy.sh deploy
39
+
40
+ # ์ƒํƒœ ํ™•์ธ
41
+ ./scripts/deploy.sh status
42
+ ```
43
+
44
+ #### 2. ๋กœ์ปฌ ๊ฐœ๋ฐœ ํ™˜๊ฒฝ
45
+
46
+ ```bash
47
+ # ๊ฐ€์ƒํ™˜๊ฒฝ ์ƒ์„ฑ
48
+ python -m venv venv
49
+ source venv/bin/activate # Windows: venv\Scripts\activate
50
+
51
+ # ์˜์กด์„ฑ ์„ค์น˜
52
+ pip install -r requirements.txt
53
+
54
+ # NLTK ๋ฐ์ดํ„ฐ ๋‹ค์šด๋กœ๋“œ
55
+ python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab')"
56
+
57
+ # ์„œ๋ฒ„ ์‹คํ–‰
58
+ python run_server_v2.py
59
+ ```
60
+
61
+ ### ์ฒซ ๋ฒˆ์งธ ์š”์ฒญ
62
+
63
+ ```bash
64
+ # ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ
65
+ curl http://localhost:8001/health
66
+
67
+ # ๋ชจ๋ธ ๋ชฉ๋ก ์กฐํšŒ
68
+ curl http://localhost:8001/models
69
+
70
+ # ๊ฐ„๋‹จํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ
71
+ curl -X POST http://localhost:8001/generate \
72
+ -H "Content-Type: application/x-www-form-urlencoded" \
73
+ -d "prompt=์•ˆ๋…•ํ•˜์„ธ์š”!&model_id=polyglot-ko-1.3b-chat&max_length=100"
74
+ ```
75
+
76
+ ## ๐Ÿค– ๊ธฐ๋ณธ ๊ธฐ๋Šฅ
77
+
78
+ ### 1. ํ…์ŠคํŠธ ์ƒ์„ฑ
79
+
80
+ #### ๋‹จ์ˆœ ํ…์ŠคํŠธ ์ƒ์„ฑ
81
+
82
+ ```python
83
+ import requests
84
+
85
+ def generate_text(prompt, model_id="polyglot-ko-1.3b-chat"):
86
+ url = "http://localhost:8001/generate"
87
+ data = {
88
+ "prompt": prompt,
89
+ "model_id": model_id,
90
+ "max_length": 200,
91
+ "temperature": 0.7,
92
+ "top_p": 0.9,
93
+ "do_sample": True
94
+ }
95
+
96
+ response = requests.post(url, data=data)
97
+ return response.json()
98
+
99
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
100
+ result = generate_text("์ธ๊ณต์ง€๋Šฅ์˜ ๋ฏธ๋ž˜์— ๋Œ€ํ•ด ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”.")
101
+ print(result["generated_text"])
102
+ ```
103
+
104
+ #### ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค๋ช…
105
+
106
+ | ํŒŒ๋ผ๋ฏธํ„ฐ | ์„ค๋ช… | ๊ธฐ๋ณธ๊ฐ’ | ๋ฒ”์œ„ |
107
+ |----------|------|--------|------|
108
+ | `prompt` | ์ž…๋ ฅ ํ…์ŠคํŠธ | ํ•„์ˆ˜ | - |
109
+ | `model_id` | ์‚ฌ์šฉํ•  ๋ชจ๋ธ | polyglot-ko-1.3b-chat | ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก |
110
+ | `max_length` | ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜ | 200 | 1-4000 |
111
+ | `temperature` | ์ฐฝ์˜์„ฑ ์กฐ์ ˆ | 0.7 | 0.0-2.0 |
112
+ | `top_p` | ๋ˆ„์  ํ™•๋ฅ  ์ž„๊ณ„๊ฐ’ | 0.9 | 0.0-1.0 |
113
+ | `do_sample` | ์ƒ˜ํ”Œ๋ง ์‚ฌ์šฉ ์—ฌ๋ถ€ | True | True/False |
114
+
115
+ ### 2. ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ
116
+
117
+ #### ์ด๋ฏธ์ง€์™€ ํ…์ŠคํŠธ ํ•จ๊ป˜ ์ฒ˜๋ฆฌ
118
+
119
+ ```python
120
+ def generate_multimodal(prompt, image_files, model_id="kanana-1.5-v-3b-instruct"):
121
+ url = "http://localhost:8001/generate-multimodal"
122
+
123
+ files = []
124
+ for i, image_file in enumerate(image_files):
125
+ files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
126
+
127
+ data = {
128
+ "prompt": prompt,
129
+ "model_id": model_id,
130
+ "max_length": 200,
131
+ "temperature": 0.7
132
+ }
133
+
134
+ response = requests.post(url, files=files, data=data)
135
+ return response.json()
136
+
137
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
138
+ result = generate_multimodal(
139
+ "์ด ์ด๋ฏธ์ง€์— ๋Œ€ํ•ด ์„ค๋ช…ํ•ด์ฃผ์„ธ์š”.",
140
+ ["image1.jpg", "image2.jpg"]
141
+ )
142
+ print(result["generated_text"])
143
+ ```
144
+
145
+ ### 3. ์‚ฌ์šฉ์ž ๊ด€๋ฆฌ
146
+
147
+ #### ์‚ฌ์šฉ์ž ๋“ฑ๋ก ๋ฐ ๋กœ๊ทธ์ธ
148
+
149
+ ```python
150
+ def register_user(username, email, password):
151
+ url = "http://localhost:8001/auth/register"
152
+ data = {
153
+ "username": username,
154
+ "email": email,
155
+ "password": password
156
+ }
157
+
158
+ response = requests.post(url, data=data)
159
+ return response.json()
160
+
161
+ def login_user(username, password):
162
+ url = "http://localhost:8001/auth/login"
163
+ data = {
164
+ "username": username,
165
+ "password": password
166
+ }
167
+
168
+ response = requests.post(url, data=data)
169
+ return response.json()
170
+
171
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
172
+ # 1. ์‚ฌ์šฉ์ž ๋“ฑ๋ก
173
+ register_result = register_user("testuser", "test@example.com", "password123")
174
+ access_token = register_result["access_token"]
175
+
176
+ # 2. ๋กœ๊ทธ์ธ
177
+ login_result = login_user("testuser", "password123")
178
+ access_token = login_result["access_token"]
179
+ ```
180
+
181
+ #### ์ธ์ฆ์ด ํ•„์š”ํ•œ ์š”์ฒญ
182
+
183
+ ```python
184
+ def authenticated_request(url, data, token):
185
+ headers = {"Authorization": f"Bearer {token}"}
186
+ response = requests.post(url, data=data, headers=headers)
187
+ return response.json()
188
+
189
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
190
+ result = authenticated_request(
191
+ "http://localhost:8001/generate",
192
+ {"prompt": "์•ˆ๋…•ํ•˜์„ธ์š”!", "model_id": "polyglot-ko-1.3b-chat"},
193
+ access_token
194
+ )
195
+ ```
196
+
197
+ ## ๐Ÿ“„ ๊ณ ๊ธ‰ ๊ธฐ๋Šฅ
198
+
199
+ ### 1. ๋ฌธ์„œ ์ฒ˜๋ฆฌ (RAG)
200
+
201
+ #### ๋ฌธ์„œ ์—…๋กœ๋“œ
202
+
203
+ ```python
204
+ def upload_document(file_path, user_id, token=None):
205
+ url = "http://localhost:8001/document/upload"
206
+
207
+ with open(file_path, 'rb') as f:
208
+ files = {'file': f}
209
+ data = {'user_id': user_id}
210
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
211
+
212
+ response = requests.post(url, files=files, data=data, headers=headers)
213
+ return response.json()
214
+
215
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
216
+ result = upload_document("document.pdf", "user123", access_token)
217
+ document_id = result["document_id"]
218
+ ```
219
+
220
+ #### RAG ์ฟผ๋ฆฌ
221
+
222
+ ```python
223
+ def rag_query(query, user_id, token=None):
224
+ url = "http://localhost:8001/rag/generate"
225
+
226
+ data = {
227
+ "query": query,
228
+ "user_id": user_id,
229
+ "max_length": 300,
230
+ "temperature": 0.7
231
+ }
232
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
233
+
234
+ response = requests.post(url, data=data, headers=headers)
235
+ return response.json()
236
+
237
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
238
+ result = rag_query("์ธ๊ณต์ง€๋Šฅ์˜ ๋ฏธ๋ž˜์— ๋Œ€ํ•ด ์•Œ๋ ค์ฃผ์„ธ์š”.", "user123", access_token)
239
+ print(result["response"])
240
+ print("์ถœ์ฒ˜:", result["sources"])
241
+ ```
242
+
243
+ #### ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG (์ด๋ฏธ์ง€ + ๋ฌธ์„œ)
244
+
245
+ ```python
246
+ def hybrid_rag_query(query, image_files, user_id, token=None):
247
+ url = "http://localhost:8001/rag/generate-hybrid"
248
+
249
+ files = []
250
+ for i, image_file in enumerate(image_files):
251
+ files.append(('image_files', (f'image_{i}.jpg', open(image_file, 'rb'), 'image/jpeg')))
252
+
253
+ data = {
254
+ "query": query,
255
+ "user_id": user_id,
256
+ "max_length": 300,
257
+ "temperature": 0.7
258
+ }
259
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
260
+
261
+ response = requests.post(url, files=files, data=data, headers=headers)
262
+ return response.json()
263
+ ```
264
+
265
+ ### 2. ์ฑ„ํŒ… ์„ธ์…˜ ๊ด€๋ฆฌ
266
+
267
+ #### ์„ธ์…˜ ์ƒ์„ฑ ๋ฐ ๋ฉ”์‹œ์ง€ ๊ด€๋ฆฌ
268
+
269
+ ```python
270
+ def create_chat_session(user_id, session_name, token=None):
271
+ url = "http://localhost:8001/session/create"
272
+
273
+ data = {
274
+ "user_id": user_id,
275
+ "session_name": session_name
276
+ }
277
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
278
+
279
+ response = requests.post(url, data=data, headers=headers)
280
+ return response.json()
281
+
282
+ def add_chat_message(session_id, user_id, content, token=None):
283
+ url = "http://localhost:8001/chat/message"
284
+
285
+ data = {
286
+ "session_id": session_id,
287
+ "user_id": user_id,
288
+ "message_type": "text",
289
+ "content": content
290
+ }
291
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
292
+
293
+ response = requests.post(url, data=data, headers=headers)
294
+ return response.json()
295
+
296
+ def get_chat_history(session_id, token=None):
297
+ url = f"http://localhost:8001/chat/history/{session_id}"
298
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
299
+
300
+ response = requests.get(url, headers=headers)
301
+ return response.json()
302
+
303
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
304
+ # 1. ์„ธ์…˜ ์ƒ์„ฑ
305
+ session_result = create_chat_session("user123", "AI ์ƒ๋‹ด", access_token)
306
+ session_id = session_result["session_id"]
307
+
308
+ # 2. ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
309
+ add_chat_message(session_id, "user123", "์•ˆ๋…•ํ•˜์„ธ์š”!", access_token)
310
+
311
+ # 3. ์ฑ„ํŒ… ๊ธฐ๋ก ์กฐํšŒ
312
+ history = get_chat_history(session_id, access_token)
313
+ for message in history:
314
+ print(f"{message['timestamp']}: {message['content']}")
315
+ ```
316
+
317
+ ### 3. ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—…
318
+
319
+ #### ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—…
320
+
321
+ ```python
322
+ def start_document_processing(file_path, user_id, token=None):
323
+ url = "http://localhost:8001/tasks/document/process"
324
+
325
+ data = {
326
+ "file_path": file_path,
327
+ "user_id": user_id
328
+ }
329
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
330
+
331
+ response = requests.post(url, data=data, headers=headers)
332
+ return response.json()
333
+
334
+ def check_task_status(task_id, token=None):
335
+ url = f"http://localhost:8001/tasks/{task_id}"
336
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
337
+
338
+ response = requests.get(url, headers=headers)
339
+ return response.json()
340
+
341
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
342
+ # 1. ์ž‘์—… ์‹œ์ž‘
343
+ task_result = start_document_processing("/path/to/document.pdf", "user123", access_token)
344
+ task_id = task_result["task_id"]
345
+
346
+ # 2. ์ž‘์—… ์ƒํƒœ ํ™•์ธ
347
+ import time
348
+ while True:
349
+ status = check_task_status(task_id, access_token)
350
+ print(f"์ƒํƒœ: {status['status']}, ์ง„ํ–‰๋ฅ : {status.get('progress', 0)}%")
351
+
352
+ if status['status'] in ['SUCCESS', 'FAILURE']:
353
+ break
354
+
355
+ time.sleep(5)
356
+ ```
357
+
358
+ ### 4. ๋ชจ๋‹ˆํ„ฐ๋ง
359
+
360
+ #### ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง
361
+
362
+ ```python
363
+ def start_monitoring():
364
+ url = "http://localhost:8001/monitoring/start"
365
+ response = requests.post(url)
366
+ return response.json()
367
+
368
+ def get_monitoring_status():
369
+ url = "http://localhost:8001/monitoring/status"
370
+ response = requests.get(url)
371
+ return response.json()
372
+
373
+ def get_system_health():
374
+ url = "http://localhost:8001/monitoring/health"
375
+ response = requests.get(url)
376
+ return response.json()
377
+
378
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
379
+ # 1. ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘
380
+ start_monitoring()
381
+
382
+ # 2. ์ƒํƒœ ํ™•์ธ
383
+ status = get_monitoring_status()
384
+ print(f"CPU ์‚ฌ์šฉ๋ฅ : {status['current_metrics']['cpu_percent']}%")
385
+ print(f"๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋ฅ : {status['current_metrics']['memory_percent']}%")
386
+
387
+ # 3. ์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ
388
+ health = get_system_health()
389
+ print(f"์‹œ์Šคํ…œ ์ƒํƒœ: {health['status']}")
390
+ for recommendation in health['recommendations']:
391
+ print(f"๊ถŒ์žฅ์‚ฌํ•ญ: {recommendation}")
392
+ ```
393
+
394
+ ## ๐Ÿ”Œ WebSocket ์‹ค์‹œ๊ฐ„ ์ฑ„ํŒ…
395
+
396
+ ### WebSocket ํด๋ผ์ด์–ธํŠธ
397
+
398
+ ```javascript
399
+ class LilyLLMWebSocket {
400
+ constructor(userId) {
401
+ this.userId = userId;
402
+ this.ws = null;
403
+ this.messageHandlers = [];
404
+ }
405
+
406
+ connect() {
407
+ this.ws = new WebSocket(`ws://localhost:8001/ws/${this.userId}`);
408
+
409
+ this.ws.onopen = () => {
410
+ console.log('WebSocket ์—ฐ๊ฒฐ๋จ');
411
+ };
412
+
413
+ this.ws.onmessage = (event) => {
414
+ const data = JSON.parse(event.data);
415
+ this.handleMessage(data);
416
+ };
417
+
418
+ this.ws.onclose = () => {
419
+ console.log('WebSocket ์—ฐ๊ฒฐ ์ข…๋ฃŒ');
420
+ };
421
+
422
+ this.ws.onerror = (error) => {
423
+ console.error('WebSocket ์˜ค๋ฅ˜:', error);
424
+ };
425
+ }
426
+
427
+ sendMessage(message, sessionId) {
428
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) {
429
+ this.ws.send(JSON.stringify({
430
+ type: 'chat',
431
+ message: message,
432
+ session_id: sessionId
433
+ }));
434
+ }
435
+ }
436
+
437
+ addMessageHandler(handler) {
438
+ this.messageHandlers.push(handler);
439
+ }
440
+
441
+ handleMessage(data) {
442
+ this.messageHandlers.forEach(handler => handler(data));
443
+ }
444
+
445
+ disconnect() {
446
+ if (this.ws) {
447
+ this.ws.close();
448
+ }
449
+ }
450
+ }
451
+
452
+ // ์‚ฌ์šฉ ์˜ˆ์ œ
453
+ const wsClient = new LilyLLMWebSocket('user123');
454
+ wsClient.connect();
455
+
456
+ wsClient.addMessageHandler((data) => {
457
+ console.log('๋ฉ”์‹œ์ง€ ์ˆ˜์‹ :', data);
458
+ });
459
+
460
+ wsClient.sendMessage('์•ˆ๋…•ํ•˜์„ธ์š”!', 'session123');
461
+ ```
462
+
463
+ ## ๐Ÿšจ ๋ฌธ์ œ ํ•ด๊ฒฐ
464
+
465
+ ### ์ผ๋ฐ˜์ ์ธ ๋ฌธ์ œ๋“ค
466
+
467
+ #### 1. ์„œ๋ฒ„ ์—ฐ๊ฒฐ ์‹คํŒจ
468
+
469
+ **์ฆ์ƒ**: `Connection refused` ๋˜๋Š” `Failed to establish a new connection`
470
+
471
+ **ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•**:
472
+ ```bash
473
+ # ์„œ๋ฒ„ ์ƒํƒœ ํ™•์ธ
474
+ curl http://localhost:8001/health
475
+
476
+ # ์„œ๋ฒ„ ์žฌ์‹œ์ž‘
477
+ ./scripts/deploy.sh restart
478
+
479
+ # ๋กœ๊ทธ ํ™•์ธ
480
+ ./scripts/deploy.sh logs
481
+ ```
482
+
483
+ #### 2. ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ
484
+
485
+ **์ฆ์ƒ**: `Out of memory` ๋˜๋Š” ์‘๋‹ต ์†๋„ ์ €ํ•˜
486
+
487
+ **ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•**:
488
+ ```bash
489
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
490
+ docker stats
491
+
492
+ # ๋ถˆํ•„์š”ํ•œ ์ปจํ…Œ์ด๋„ˆ ์ •๋ฆฌ
493
+ docker system prune -f
494
+
495
+ # ๋ฆฌ์†Œ์Šค ์ œํ•œ ์„ค์ • (docker-compose.yml)
496
+ services:
497
+ lily-llm-api:
498
+ deploy:
499
+ resources:
500
+ limits:
501
+ memory: 4G
502
+ ```
503
+
504
+ #### 3. ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ
505
+
506
+ **์ฆ์ƒ**: `Model not found` ๋˜๋Š” ๋ชจ๋ธ ๊ด€๋ จ ์˜ค๋ฅ˜
507
+
508
+ **ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•**:
509
+ ```bash
510
+ # ๋ชจ๋ธ ๋ชฉ๋ก ํ™•์ธ
511
+ curl http://localhost:8001/models
512
+
513
+ # ๋ชจ๋ธ ํŒŒ์ผ ํ™•์ธ
514
+ ls -la models/
515
+
516
+ # ์„œ๋ฒ„ ์žฌ์‹œ์ž‘
517
+ ./scripts/deploy.sh restart
518
+ ```
519
+
520
+ #### 4. ์ธ์ฆ ์˜ค๋ฅ˜
521
+
522
+ **์ฆ์ƒ**: `401 Unauthorized` ๋˜๋Š” `403 Forbidden`
523
+
524
+ **ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•**:
525
+ ```python
526
+ # ํ† ํฐ ๊ฐฑ์‹ 
527
+ def refresh_token(refresh_token):
528
+ url = "http://localhost:8001/auth/refresh"
529
+ data = {"refresh_token": refresh_token}
530
+ response = requests.post(url, data=data)
531
+ return response.json()
532
+
533
+ # ์ƒˆ๋กœ์šด ํ† ํฐ์œผ๋กœ ์š”์ฒญ
534
+ new_tokens = refresh_token(old_refresh_token)
535
+ access_token = new_tokens["access_token"]
536
+ ```
537
+
538
+ ### ์„ฑ๋Šฅ ์ตœ์ ํ™”
539
+
540
+ #### 1. ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ
541
+
542
+ ```python
543
+ def batch_generate_texts(prompts, model_id="polyglot-ko-1.3b-chat"):
544
+ results = []
545
+ for prompt in prompts:
546
+ result = generate_text(prompt, model_id)
547
+ results.append(result)
548
+ return results
549
+
550
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
551
+ prompts = [
552
+ "์ฒซ ๋ฒˆ์งธ ์งˆ๋ฌธ์ž…๋‹ˆ๋‹ค.",
553
+ "๋‘ ๋ฒˆ์งธ ์งˆ๋ฌธ์ž…๋‹ˆ๋‹ค.",
554
+ "์„ธ ๋ฒˆ์งธ ์งˆ๋ฌธ์ž…๋‹ˆ๋‹ค."
555
+ ]
556
+ results = batch_generate_texts(prompts)
557
+ ```
558
+
559
+ #### 2. ์บ์‹ฑ ํ™œ์šฉ
560
+
561
+ ```python
562
+ import redis
563
+ import json
564
+
565
+ class CachedLilyLLMClient:
566
+ def __init__(self, base_url="http://localhost:8001"):
567
+ self.base_url = base_url
568
+ self.redis_client = redis.Redis(host='localhost', port=6379, db=0)
569
+
570
+ def generate_text_with_cache(self, prompt, model_id="polyglot-ko-1.3b-chat"):
571
+ # ์บ์‹œ ํ‚ค ์ƒ์„ฑ
572
+ cache_key = f"text_gen:{hash(prompt + model_id)}"
573
+
574
+ # ์บ์‹œ์—์„œ ํ™•์ธ
575
+ cached_result = self.redis_client.get(cache_key)
576
+ if cached_result:
577
+ return json.loads(cached_result)
578
+
579
+ # API ํ˜ธ์ถœ
580
+ result = generate_text(prompt, model_id)
581
+
582
+ # ์บ์‹œ์— ์ €์žฅ (1์‹œ๊ฐ„)
583
+ self.redis_client.setex(cache_key, 3600, json.dumps(result))
584
+
585
+ return result
586
+ ```
587
+
588
+ ## ๐Ÿ“š ๋ชจ๋ฒ” ์‚ฌ๋ก€
589
+
590
+ ### 1. ์—๋Ÿฌ ์ฒ˜๋ฆฌ
591
+
592
+ ```python
593
+ import requests
594
+ from requests.exceptions import RequestException
595
+
596
+ def safe_api_call(func, *args, **kwargs):
597
+ try:
598
+ return func(*args, **kwargs)
599
+ except RequestException as e:
600
+ print(f"๋„คํŠธ์›Œํฌ ์˜ค๋ฅ˜: {e}")
601
+ return None
602
+ except Exception as e:
603
+ print(f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {e}")
604
+ return None
605
+
606
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
607
+ result = safe_api_call(generate_text, "์•ˆ๋…•ํ•˜์„ธ์š”!")
608
+ if result:
609
+ print(result["generated_text"])
610
+ ```
611
+
612
+ ### 2. ์žฌ์‹œ๋„ ๋กœ์ง
613
+
614
+ ```python
615
+ import time
616
+ from functools import wraps
617
+
618
+ def retry_on_failure(max_retries=3, delay=1):
619
+ def decorator(func):
620
+ @wraps(func)
621
+ def wrapper(*args, **kwargs):
622
+ for attempt in range(max_retries):
623
+ try:
624
+ return func(*args, **kwargs)
625
+ except Exception as e:
626
+ if attempt == max_retries - 1:
627
+ raise e
628
+ print(f"์‹œ๋„ {attempt + 1} ์‹คํŒจ, {delay}์ดˆ ํ›„ ์žฌ์‹œ๋„...")
629
+ time.sleep(delay)
630
+ return None
631
+ return wrapper
632
+ return decorator
633
+
634
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
635
+ @retry_on_failure(max_retries=3, delay=2)
636
+ def robust_generate_text(prompt):
637
+ return generate_text(prompt)
638
+ ```
639
+
640
+ ### 3. ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ
641
+
642
+ ```python
643
+ import asyncio
644
+ import aiohttp
645
+
646
+ async def async_generate_text(session, prompt, model_id="polyglot-ko-1.3b-chat"):
647
+ url = "http://localhost:8001/generate"
648
+ data = {
649
+ "prompt": prompt,
650
+ "model_id": model_id,
651
+ "max_length": 200,
652
+ "temperature": 0.7
653
+ }
654
+
655
+ async with session.post(url, data=data) as response:
656
+ return await response.json()
657
+
658
+ async def batch_generate_async(prompts):
659
+ async with aiohttp.ClientSession() as session:
660
+ tasks = [async_generate_text(session, prompt) for prompt in prompts]
661
+ results = await asyncio.gather(*tasks)
662
+ return results
663
+
664
+ # ์‚ฌ์šฉ ์˜ˆ์ œ
665
+ prompts = ["์งˆ๋ฌธ1", "์งˆ๋ฌธ2", "์งˆ๋ฌธ3"]
666
+ results = asyncio.run(batch_generate_async(prompts))
667
+ ```
668
+
669
+ ### 4. ๋กœ๊น…
670
+
671
+ ```python
672
+ import logging
673
+
674
+ # ๋กœ๊น… ์„ค์ •
675
+ logging.basicConfig(
676
+ level=logging.INFO,
677
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
678
+ handlers=[
679
+ logging.FileHandler('lily_llm_client.log'),
680
+ logging.StreamHandler()
681
+ ]
682
+ )
683
+
684
+ logger = logging.getLogger(__name__)
685
+
686
+ def generate_text_with_logging(prompt, model_id="polyglot-ko-1.3b-chat"):
687
+ logger.info(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹œ์ž‘: {prompt[:50]}...")
688
+
689
+ try:
690
+ result = generate_text(prompt, model_id)
691
+ logger.info(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์„ฑ๊ณต: {len(result['generated_text'])} ๋ฌธ์ž")
692
+ return result
693
+ except Exception as e:
694
+ logger.error(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
695
+ raise
696
+ ```
697
+
698
+ ## ๐Ÿ“ž ์ง€์›
699
+
700
+ ### ๋„์›€๋ง ๋ฆฌ์†Œ์Šค
701
+
702
+ - **API ๋ฌธ์„œ**: `http://localhost:8001/docs`
703
+ - **ReDoc ๋ฌธ์„œ**: `http://localhost:8001/redoc`
704
+ - **GitHub Issues**: ํ”„๋กœ์ ํŠธ ์ €์žฅ์†Œ์˜ Issues ์„น์…˜
705
+ - **๋กœ๊ทธ ํŒŒ์ผ**: `./logs/` ๋””๋ ‰ํ† ๋ฆฌ
706
+
707
+ ### ๋””๋ฒ„๊น… ํŒ
708
+
709
+ 1. **๋กœ๊ทธ ํ™•์ธ**: ํ•ญ์ƒ ๋กœ๊ทธ๋ฅผ ๋จผ์ € ํ™•์ธํ•˜์„ธ์š”
710
+ 2. **๋‹จ๊ณ„๋ณ„ ํ…Œ์ŠคํŠธ**: ๋ณต์žกํ•œ ์š”์ฒญ์„ ์ž‘์€ ๋‹จ์œ„๋กœ ๋‚˜๋ˆ„์–ด ํ…Œ์ŠคํŠธํ•˜์„ธ์š”
711
+ 3. **๋„คํŠธ์›Œํฌ ํ™•์ธ**: ๋ฐฉํ™”๋ฒฝ์ด๋‚˜ ํ”„๋ก์‹œ ์„ค์ •์„ ํ™•์ธํ•˜์„ธ์š”
712
+ 4. **๋ฆฌ์†Œ์Šค ๋ชจ๋‹ˆํ„ฐ๋ง**: CPU, ๋ฉ”๋ชจ๋ฆฌ, ๋””์Šคํฌ ์‚ฌ์šฉ๋Ÿ‰์„ ์ฃผ๊ธฐ์ ์œผ๋กœ ํ™•์ธํ•˜์„ธ์š”
713
+
714
+ ### ์„ฑ๋Šฅ ํŒ
715
+
716
+ 1. **์ ์ ˆํ•œ ๋ชจ๋ธ ์„ ํƒ**: ์ž‘์—…์— ๋งž๋Š” ๋ชจ๋ธ์„ ์„ ํƒํ•˜์„ธ์š”
717
+ 2. **๋ฐฐ์น˜ ์ฒ˜๋ฆฌ**: ์—ฌ๋Ÿฌ ์š”์ฒญ์„ ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌํ•˜์„ธ์š”
718
+ 3. **์บ์‹ฑ ํ™œ์šฉ**: ๋ฐ˜๋ณต๋˜๋Š” ์š”์ฒญ์€ ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”
719
+ 4. **๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ**: ๋Œ€๋Ÿ‰์˜ ์š”์ฒญ์€ ๋น„๋™๊ธฐ๋กœ ์ฒ˜๋ฆฌํ•˜์„ธ์š”
download_model.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Docker ๋นŒ๋“œ ์‹œ Hugging Face ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+
6
+ import os
7
+ import logging
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ from pathlib import Path
10
+
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def download_model():
15
+ """Hugging Face Hub์—์„œ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ"""
16
+
17
+ model_name = "gbrabbit/lily-math-model"
18
+ cache_dir = "/app/cache/transformers"
19
+
20
+ logger.info(f"๐Ÿ“ฅ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์‹œ์ž‘: {model_name}")
21
+
22
+ try:
23
+ # ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
24
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
25
+
26
+ # ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ
27
+ logger.info("๐Ÿ”ค ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ ์ค‘...")
28
+ tokenizer = AutoTokenizer.from_pretrained(
29
+ model_name,
30
+ trust_remote_code=True,
31
+ cache_dir=cache_dir
32
+ )
33
+ logger.info("โœ… ํ† ํฌ๋‚˜์ด์ € ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ")
34
+
35
+ # ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ (๊ฐ€์ค‘์น˜๋งŒ)
36
+ logger.info("๐Ÿง  ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์ค‘...")
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_name,
39
+ trust_remote_code=True,
40
+ cache_dir=cache_dir,
41
+ torch_dtype="auto", # ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ
42
+ low_cpu_mem_usage=True
43
+ )
44
+ logger.info("โœ… ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ")
45
+
46
+ # ๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ
47
+ del model
48
+ del tokenizer
49
+
50
+ logger.info("๐ŸŽ‰ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ๋ฐ ์บ์‹œ ์™„๋ฃŒ")
51
+
52
+ except Exception as e:
53
+ logger.error(f"โŒ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์‹คํŒจ: {e}")
54
+ raise
55
+
56
+ if __name__ == "__main__":
57
+ download_model()
fix_huggingface_hub.bat ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @chcp 65001 >nul
2
+ @echo off
3
+ echo HuggingFace Hub ๋ฒ„์ „ ๋ฌธ์ œ ํ•ด๊ฒฐ ์‹œ์ž‘...
4
+
5
+ cd /d C:\Project\lily_generate_project\lily_generate_package
6
+
7
+ echo.
8
+ echo 1. latex_ocr_env ํ™œ์„ฑํ™”...
9
+ call latex_ocr_env\Scripts\activate
10
+
11
+ echo.
12
+ echo 2. ํ˜„์žฌ huggingface_hub ๋ฒ„์ „ ํ™•์ธ...
13
+ python -c "import huggingface_hub; print('ํ˜„์žฌ ๋ฒ„์ „:', huggingface_hub.__version__)"
14
+
15
+ echo.
16
+ echo 3. huggingface_hub ๋‹ค์šด๊ทธ๋ ˆ์ด๋“œ (ํ˜ธํ™˜ ๋ฒ„์ „์œผ๋กœ)...
17
+ python -m pip install huggingface_hub==0.16.4
18
+
19
+ echo.
20
+ echo 4. sentence-transformers ์žฌ์„ค์น˜...
21
+ python -m pip uninstall sentence-transformers -y
22
+ python -m pip install sentence-transformers==2.2.2
23
+
24
+ echo.
25
+ echo 5. ์„ค์น˜ ํ™•์ธ...
26
+ python -c "import huggingface_hub; print('HuggingFace Hub:', huggingface_hub.__version__)"
27
+ python -c "import sentence_transformers; print('SentenceTransformers')"
28
+
29
+ echo.
30
+ echo HuggingFace Hub ๋ฒ„์ „ ๋ฌธ์ œ ํ•ด๊ฒฐ ์™„๋ฃŒ!
31
+ pause
huggingface_cloud_setup.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hugging Face ํด๋ผ์šฐ๋“œ GPU ํ™˜๊ฒฝ ์„ค์ •
4
+ """
5
+
6
+ import os
7
+ import requests
8
+ import json
9
+ import logging
10
+ from huggingface_hub import login, HfApi
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
12
+
13
+ # ๋กœ๊น… ์„ค์ •
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class HuggingFaceCloudSetup:
18
+ """Hugging Face ํด๋ผ์šฐ๋“œ GPU ์„ค์ • ํด๋ž˜์Šค"""
19
+
20
+ def __init__(self):
21
+ """์ดˆ๊ธฐํ™”"""
22
+ self.api = None
23
+ self.model_name = "heegyu/polyglot-ko-5.8b-chat"
24
+ self.space_name = "lily-math-rag"
25
+
26
+ def setup_huggingface_login(self):
27
+ """Hugging Face ๋กœ๊ทธ์ธ ์„ค์ •"""
28
+ logger.info("๐Ÿ” Hugging Face ๋กœ๊ทธ์ธ ์„ค์ •")
29
+
30
+ try:
31
+ # ํ† ํฐ ์ž…๋ ฅ ์š”์ฒญ
32
+ token = input("Hugging Face ํ† ํฐ์„ ์ž…๋ ฅํ•˜์„ธ์š”: ").strip()
33
+
34
+ if token:
35
+ login(token)
36
+ self.api = HfApi(token=token)
37
+ logger.info("โœ… Hugging Face ๋กœ๊ทธ์ธ ์„ฑ๊ณต")
38
+ return True
39
+ else:
40
+ logger.error("โŒ ํ† ํฐ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค")
41
+ return False
42
+
43
+ except Exception as e:
44
+ logger.error(f"โŒ Hugging Face ๋กœ๊ทธ์ธ ์‹คํŒจ: {e}")
45
+ return False
46
+
47
+ def create_inference_endpoint(self):
48
+ """์ถ”๋ก  ์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ"""
49
+ logger.info("๐Ÿš€ ์ถ”๋ก  ์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ ์ค‘...")
50
+
51
+ try:
52
+ # ์—”๋“œํฌ์ธํŠธ ์„ค์ •
53
+ endpoint_config = {
54
+ "account": "your-username", # Hugging Face ์‚ฌ์šฉ์ž๋ช…
55
+ "name": "lily-math-rag-endpoint",
56
+ "repository": self.model_name,
57
+ "framework": "pytorch",
58
+ "accelerator": "gpu",
59
+ "instance_type": "gpu.t4.medium", # GPU ์ธ์Šคํ„ด์Šค ํƒ€์ž…
60
+ "region": "us-east-1",
61
+ "vendor": "aws"
62
+ }
63
+
64
+ logger.info("โœ… ์—”๋“œํฌ์ธํŠธ ์„ค์ • ์™„๋ฃŒ")
65
+ logger.info(f" ๋ชจ๋ธ: {self.model_name}")
66
+ logger.info(f" GPU: {endpoint_config['instance_type']}")
67
+ logger.info(f" ์ง€์—ญ: {endpoint_config['region']}")
68
+
69
+ return endpoint_config
70
+
71
+ except Exception as e:
72
+ logger.error(f"โŒ ์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
73
+ return None
74
+
75
+ def create_huggingface_space(self):
76
+ """Hugging Face Space ์ƒ์„ฑ"""
77
+ logger.info("๐ŸŒ Hugging Face Space ์ƒ์„ฑ ์ค‘...")
78
+
79
+ try:
80
+ # Space ์„ค์ •
81
+ space_config = {
82
+ "name": self.space_name,
83
+ "type": "gradio",
84
+ "sdk": "gradio",
85
+ "title": "Lily Math RAG System",
86
+ "description": "์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ์„ ์œ„ํ•œ RAG ์‹œ์Šคํ…œ",
87
+ "license": "mit",
88
+ "python_version": "3.9"
89
+ }
90
+
91
+ logger.info("โœ… Space ์„ค์ • ์™„๋ฃŒ")
92
+ logger.info(f" Space ์ด๋ฆ„: {space_config['name']}")
93
+ logger.info(f" ํƒ€์ž…: {space_config['type']}")
94
+
95
+ return space_config
96
+
97
+ except Exception as e:
98
+ logger.error(f"โŒ Space ์ƒ์„ฑ ์‹คํŒจ: {e}")
99
+ return None
100
+
101
+ def upload_model_to_hub(self):
102
+ """๋ชจ๋ธ์„ Hugging Face Hub์— ์—…๋กœ๋“œ"""
103
+ logger.info("๐Ÿ“ค ๋ชจ๋ธ ์—…๋กœ๋“œ ์ค‘...")
104
+
105
+ try:
106
+ # ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ
107
+ local_model_path = "hearth_llm_model"
108
+
109
+ if os.path.exists(local_model_path):
110
+ logger.info(f"โœ… ๋กœ์ปฌ ๋ชจ๋ธ ๋ฐœ๊ฒฌ: {local_model_path}")
111
+
112
+ # ๋ชจ๋ธ ์—…๋กœ๋“œ (์‹ค์ œ๋กœ๋Š” Hugging Face CLI ์‚ฌ์šฉ)
113
+ logger.info("๐Ÿ’ก ๋‹ค์Œ ๋ช…๋ น์–ด๋กœ ๋ชจ๋ธ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”:")
114
+ logger.info(f" huggingface-cli upload your-username/lily-math-model {local_model_path}")
115
+
116
+ return True
117
+ else:
118
+ logger.warning(f"โš ๏ธ ๋กœ์ปฌ ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {local_model_path}")
119
+ return False
120
+
121
+ except Exception as e:
122
+ logger.error(f"โŒ ๋ชจ๋ธ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
123
+ return False
124
+
125
+ def test_cloud_inference(self, endpoint_url):
126
+ """ํด๋ผ์šฐ๋“œ ์ถ”๋ก  ํ…Œ์ŠคํŠธ"""
127
+ logger.info("๐Ÿงช ํด๋ผ์šฐ๋“œ ์ถ”๋ก  ํ…Œ์ŠคํŠธ")
128
+
129
+ try:
130
+ # ํ…Œ์ŠคํŠธ ์š”์ฒญ
131
+ test_data = {
132
+ "inputs": "์•ˆ๋…•ํ•˜์„ธ์š”! ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋„์™€์ฃผ์„ธ์š”.",
133
+ "parameters": {
134
+ "max_length": 100,
135
+ "temperature": 0.7
136
+ }
137
+ }
138
+
139
+ response = requests.post(
140
+ f"{endpoint_url}/predict",
141
+ json=test_data,
142
+ headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
143
+ )
144
+
145
+ if response.status_code == 200:
146
+ result = response.json()
147
+ logger.info(f"โœ… ์ถ”๋ก  ํ…Œ์ŠคํŠธ ์„ฑ๊ณต: {result}")
148
+ return True
149
+ else:
150
+ logger.error(f"โŒ ์ถ”๋ก  ํ…Œ์ŠคํŠธ ์‹คํŒจ: {response.status_code}")
151
+ return False
152
+
153
+ except Exception as e:
154
+ logger.error(f"โŒ ์ถ”๋ก  ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
155
+ return False
156
+
157
+ def main():
158
+ """๋ฉ”์ธ ์„ค์ • ํ•จ์ˆ˜"""
159
+ print("๐Ÿš€ Hugging Face ํด๋ผ์šฐ๋“œ GPU ํ™˜๊ฒฝ ์„ค์ •")
160
+ print("=" * 50)
161
+
162
+ # 1. Hugging Face ์„ค์ • ํด๋ž˜์Šค ์ดˆ๊ธฐํ™”
163
+ setup = HuggingFaceCloudSetup()
164
+
165
+ # 2. Hugging Face ๋กœ๊ทธ์ธ
166
+ if not setup.setup_huggingface_login():
167
+ print("โŒ ๋กœ๊ทธ์ธ ์‹คํŒจ")
168
+ return
169
+
170
+ # 3. ์ถ”๋ก  ์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ
171
+ endpoint_config = setup.create_inference_endpoint()
172
+
173
+ # 4. Hugging Face Space ์ƒ์„ฑ
174
+ space_config = setup.create_huggingface_space()
175
+
176
+ # 5. ๋ชจ๋ธ ์—…๋กœ๋“œ
177
+ setup.upload_model_to_hub()
178
+
179
+ print("\n๐ŸŽ‰ Hugging Face ํด๋ผ์šฐ๋“œ ์„ค์ • ์™„๋ฃŒ!")
180
+ print("โœ… ๋‹ค์Œ ๋‹จ๊ณ„:")
181
+ print("1. Hugging Face Inference Endpoints์—์„œ ์—”๋“œํฌ์ธํŠธ ์ƒ์„ฑ")
182
+ print("2. ๋ชจ๋ธ์„ Hugging Face Hub์— ์—…๋กœ๋“œ")
183
+ print("3. Railway Hearth Chat๊ณผ ์—ฐ๋™ ์„ค์ •")
184
+
185
+ if __name__ == "__main__":
186
+ main()
huggingface_gpu_setup.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hugging Face GPU ํ™˜๊ฒฝ ์„ค์ •
4
+ """
5
+
6
+ import os
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
10
+ from huggingface_hub import login
11
+
12
+ # ๋กœ๊น… ์„ค์ •
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class HuggingFaceGPUSetup:
17
+ """Hugging Face GPU ์„ค์ • ํด๋ž˜์Šค"""
18
+
19
+ def __init__(self):
20
+ """์ดˆ๊ธฐํ™”"""
21
+ self.model_name = "heegyu/polyglot-ko-5.8b-chat"
22
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
23
+
24
+ def setup_environment(self):
25
+ """ํ™˜๊ฒฝ ์„ค์ •"""
26
+ logger.info("๐Ÿš€ Hugging Face GPU ํ™˜๊ฒฝ ์„ค์ • ์‹œ์ž‘")
27
+
28
+ # CUDA ์„ค์ •
29
+ if torch.cuda.is_available():
30
+ logger.info(f"โœ… GPU ์‚ฌ์šฉ ๊ฐ€๋Šฅ: {torch.cuda.get_device_name(0)}")
31
+ logger.info(f" CUDA ๋ฒ„์ „: {torch.version.cuda}")
32
+ logger.info(f" GPU ๋ฉ”๋ชจ๋ฆฌ: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f}GB")
33
+ else:
34
+ logger.warning("โš ๏ธ GPU๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. CPU ๋ชจ๋“œ๋กœ ์‹คํ–‰๋ฉ๋‹ˆ๋‹ค.")
35
+
36
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •
37
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
38
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
39
+
40
+ return True
41
+
42
+ def setup_quantization(self):
43
+ """์–‘์žํ™” ์„ค์ • (๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ)"""
44
+ logger.info("๐Ÿ”ง ์–‘์žํ™” ์„ค์ •")
45
+
46
+ try:
47
+ # 4-bit ์–‘์žํ™” ์„ค์ •
48
+ quantization_config = BitsAndBytesConfig(
49
+ load_in_4bit=True,
50
+ bnb_4bit_compute_dtype=torch.float16,
51
+ bnb_4bit_quant_type="nf4",
52
+ bnb_4bit_use_double_quant=True,
53
+ )
54
+
55
+ logger.info("โœ… 4-bit ์–‘์žํ™” ์„ค์ • ์™„๋ฃŒ")
56
+ return quantization_config
57
+
58
+ except Exception as e:
59
+ logger.warning(f"โš ๏ธ ์–‘์žํ™” ์„ค์ • ์‹คํŒจ: {e}")
60
+ return None
61
+
62
+ def load_model_optimized(self, model_name=None):
63
+ """์ตœ์ ํ™”๋œ ๋ชจ๋ธ ๋กœ๋“œ"""
64
+ if model_name:
65
+ self.model_name = model_name
66
+
67
+ logger.info(f"๐Ÿ“ฅ ๋ชจ๋ธ ๋กœ๋“œ ์ค‘: {self.model_name}")
68
+
69
+ try:
70
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
71
+ tokenizer = AutoTokenizer.from_pretrained(
72
+ self.model_name,
73
+ trust_remote_code=True,
74
+ use_fast=False
75
+ )
76
+
77
+ # ์–‘์žํ™” ์„ค์ • ์ ์šฉ
78
+ quantization_config = self.setup_quantization()
79
+
80
+ # ๋ชจ๋ธ ๋กœ๋“œ
81
+ if quantization_config:
82
+ model = AutoModelForCausalLM.from_pretrained(
83
+ self.model_name,
84
+ quantization_config=quantization_config,
85
+ device_map="auto",
86
+ trust_remote_code=True,
87
+ torch_dtype=torch.float16
88
+ )
89
+ else:
90
+ model = AutoModelForCausalLM.from_pretrained(
91
+ self.model_name,
92
+ device_map="auto",
93
+ trust_remote_code=True,
94
+ torch_dtype=torch.float16
95
+ )
96
+
97
+ logger.info("โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ")
98
+ return model, tokenizer
99
+
100
+ except Exception as e:
101
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
102
+ return None, None
103
+
104
+ def optimize_memory(self):
105
+ """๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”"""
106
+ logger.info("๐Ÿ’พ ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”")
107
+
108
+ if torch.cuda.is_available():
109
+ # GPU ๋ฉ”๋ชจ๋ฆฌ ์บ์‹œ ์ •๋ฆฌ
110
+ torch.cuda.empty_cache()
111
+
112
+ # ๋ฉ”๋ชจ๋ฆฌ ํ• ๋‹น ์ตœ์ ํ™”
113
+ torch.backends.cudnn.benchmark = True
114
+ torch.backends.cudnn.deterministic = False
115
+
116
+ # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
117
+ memory_allocated = torch.cuda.memory_allocated(0) / 1024**3
118
+ memory_reserved = torch.cuda.memory_reserved(0) / 1024**3
119
+
120
+ logger.info(f" ํ• ๋‹น๋œ ๋ฉ”๋ชจ๋ฆฌ: {memory_allocated:.2f}GB")
121
+ logger.info(f" ์˜ˆ์•ฝ๋œ ๋ฉ”๋ชจ๋ฆฌ: {memory_reserved:.2f}GB")
122
+
123
+ return True
124
+
125
+ def test_model_inference(self, model, tokenizer):
126
+ """๋ชจ๋ธ ์ถ”๋ก  ํ…Œ์ŠคํŠธ"""
127
+ logger.info("๐Ÿงช ๋ชจ๋ธ ์ถ”๋ก  ํ…Œ์ŠคํŠธ")
128
+
129
+ try:
130
+ # ํ…Œ์ŠคํŠธ ์ž…๋ ฅ
131
+ test_input = "์•ˆ๋…•ํ•˜์„ธ์š”! ์ˆ˜ํ•™ ๋ฌธ์ œ๋ฅผ ๋„์™€์ฃผ์„ธ์š”."
132
+
133
+ # ํ† ํฌ๋‚˜์ด์ง•
134
+ inputs = tokenizer(test_input, return_tensors="pt")
135
+ if torch.cuda.is_available():
136
+ inputs = {k: v.cuda() for k, v in inputs.items()}
137
+
138
+ # ์ถ”๋ก 
139
+ with torch.no_grad():
140
+ outputs = model.generate(
141
+ **inputs,
142
+ max_length=100,
143
+ num_return_sequences=1,
144
+ temperature=0.7,
145
+ do_sample=True,
146
+ pad_token_id=tokenizer.eos_token_id
147
+ )
148
+
149
+ # ๊ฒฐ๊ณผ ๋””์ฝ”๋”ฉ
150
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
151
+ logger.info(f"โœ… ์ถ”๋ก  ํ…Œ์ŠคํŠธ ์„ฑ๊ณต: {response[:50]}...")
152
+
153
+ return True
154
+
155
+ except Exception as e:
156
+ logger.error(f"โŒ ์ถ”๋ก  ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
157
+ return False
158
+
159
+ def setup_huggingface_login():
160
+ """Hugging Face ๋กœ๊ทธ์ธ ์„ค์ •"""
161
+ logger.info("๐Ÿ” Hugging Face ๋กœ๊ทธ์ธ ์„ค์ •")
162
+
163
+ try:
164
+ # ํ† ํฐ ์ž…๋ ฅ ์š”์ฒญ
165
+ token = input("Hugging Face ํ† ํฐ์„ ์ž…๋ ฅํ•˜์„ธ์š” (Enter๋กœ ๊ฑด๋„ˆ๋›ฐ๊ธฐ): ").strip()
166
+
167
+ if token:
168
+ login(token)
169
+ logger.info("โœ… Hugging Face ๋กœ๊ทธ์ธ ์„ฑ๊ณต")
170
+ return True
171
+ else:
172
+ logger.info("โš ๏ธ ํ† ํฐ์„ ์ž…๋ ฅํ•˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ๊ณต๊ฐœ ๋ชจ๋ธ๋งŒ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
173
+ return False
174
+
175
+ except Exception as e:
176
+ logger.error(f"โŒ Hugging Face ๋กœ๊ทธ์ธ ์‹คํŒจ: {e}")
177
+ return False
178
+
179
+ def main():
180
+ """๋ฉ”์ธ ์„ค์ • ํ•จ์ˆ˜"""
181
+ print("๐Ÿš€ Hugging Face GPU ํ™˜๊ฒฝ ์„ค์ •")
182
+ print("=" * 50)
183
+
184
+ # 1. Hugging Face ๋กœ๊ทธ์ธ
185
+ setup_huggingface_login()
186
+
187
+ # 2. GPU ์„ค์ • ํด๋ž˜์Šค ์ดˆ๊ธฐํ™”
188
+ setup = HuggingFaceGPUSetup()
189
+
190
+ # 3. ํ™˜๊ฒฝ ์„ค์ •
191
+ setup.setup_environment()
192
+
193
+ # 4. ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”
194
+ setup.optimize_memory()
195
+
196
+ # 5. ๋ชจ๋ธ ๋กœ๋“œ (์„ ํƒ์‚ฌํ•ญ)
197
+ load_model = input("๋ชจ๋ธ์„ ๋กœ๋“œํ•˜์‹œ๊ฒ ์Šต๋‹ˆ๊นŒ? (y/N): ").strip().lower()
198
+
199
+ if load_model == 'y':
200
+ model, tokenizer = setup.load_model_optimized()
201
+
202
+ if model and tokenizer:
203
+ # 6. ์ถ”๋ก  ํ…Œ์ŠคํŠธ
204
+ setup.test_model_inference(model, tokenizer)
205
+
206
+ print("\n๐ŸŽ‰ Hugging Face GPU ํ™˜๊ฒฝ ์„ค์ • ์™„๋ฃŒ!")
207
+ print("โœ… ์ด์ œ GPU ํ™˜๊ฒฝ์—์„œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
208
+
209
+ if __name__ == "__main__":
210
+ main()
lily-math-rag ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit e3f9de3c36d6444de4d64dbc3752d3082e7a4b0f
lily_llm.db ADDED
Binary file (41 kB). View file
 
lily_llm_api/app.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Lily LLM API ์„œ๋ฒ„
4
+ ํŒŒ์ธํŠœ๋‹๋œ Mistral-7B ๋ชจ๋ธ์„ RESTful API๋กœ ์„œ๋น™
5
+ """
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import BaseModel
10
+ import uvicorn
11
+ import logging
12
+ import time
13
+ import torch
14
+ from typing import Optional, List
15
+
16
+ # ๋กœ๊น… ์„ค์ •
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # FastAPI ์•ฑ ์ƒ์„ฑ
21
+ app = FastAPI(
22
+ title="Lily LLM API",
23
+ description="Hearth Chat์šฉ ํŒŒ์ธํŠœ๋‹๋œ Mistral-7B ๋ชจ๋ธ API",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # CORS ์„ค์ •
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"], # ๊ฐœ๋ฐœ์šฉ, ํ”„๋กœ๋•์…˜์—์„œ๋Š” ํŠน์ • ๋„๋ฉ”์ธ๋งŒ ํ—ˆ์šฉ
31
+ allow_credentials=True,
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Pydantic ๋ชจ๋ธ๋“ค
37
+ class GenerateRequest(BaseModel):
38
+ prompt: str
39
+ max_length: Optional[int] = 100
40
+ temperature: Optional[float] = 0.7
41
+ top_p: Optional[float] = 0.9
42
+ do_sample: Optional[bool] = True
43
+
44
+ class GenerateResponse(BaseModel):
45
+ generated_text: str
46
+ processing_time: float
47
+ model_name: str = "Lily LLM (Mistral-7B)"
48
+
49
+ class HealthResponse(BaseModel):
50
+ status: str
51
+ model_loaded: bool
52
+ model_name: str
53
+
54
+ # ์ „์—ญ ๋ณ€์ˆ˜
55
+ model = None
56
+ tokenizer = None
57
+ model_loaded = False
58
+
59
+ @app.on_event("startup")
60
+ async def startup_event():
61
+ """์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ๋ชจ๋ธ ๋กœ๋“œ"""
62
+ global model, tokenizer, model_loaded
63
+
64
+ logger.info("๐Ÿš€ Lily LLM API ์„œ๋ฒ„ ์‹œ์ž‘ ์ค‘...")
65
+ logger.info("๐Ÿ“ API ๋ฌธ์„œ: http://localhost:8001/docs")
66
+ logger.info("๐Ÿ” ํ—ฌ์Šค ์ฒดํฌ: http://localhost:8001/health")
67
+
68
+ try:
69
+ # ๋ชจ๋ธ ๋กœ๋”ฉ (๋น„๋™๊ธฐ๋กœ ์ฒ˜๋ฆฌํ•˜์—ฌ ์„œ๋ฒ„ ์‹œ์ž‘ ์†๋„ ํ–ฅ์ƒ)
70
+ await load_model_async()
71
+ model_loaded = True
72
+ logger.info("โœ… ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
73
+ except Exception as e:
74
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
75
+ model_loaded = False
76
+
77
+ async def load_model_async():
78
+ """๋น„๋™๊ธฐ ๋ชจ๋ธ ๋กœ๋”ฉ"""
79
+ global model, tokenizer
80
+
81
+ # ๋ชจ๋ธ ๋กœ๋”ฉ์€ ๋ณ„๋„ ์Šค๋ ˆ๋“œ์—์„œ ์‹คํ–‰
82
+ import asyncio
83
+ import concurrent.futures
84
+
85
+ def load_model_sync():
86
+ from transformers import AutoTokenizer, AutoModelForCausalLM
87
+ from peft import PeftModel
88
+ import torch
89
+
90
+ logger.info("๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
91
+
92
+ # ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ ์‚ฌ์šฉ
93
+ local_model_path = "./lily_llm_core/models/polyglot-ko-1.3b"
94
+
95
+ try:
96
+ # ๋กœ์ปฌ ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
97
+ tokenizer = AutoTokenizer.from_pretrained(local_model_path, use_fast=True)
98
+
99
+ if tokenizer.pad_token is None:
100
+ tokenizer.pad_token = tokenizer.eos_token
101
+
102
+ # ๋ชจ๋ธ ๋กœ๋“œ (CPU์—์„œ)
103
+ model = AutoModelForCausalLM.from_pretrained(
104
+ local_model_path,
105
+ torch_dtype=torch.float32,
106
+ device_map="cpu",
107
+ low_cpu_mem_usage=True
108
+ )
109
+
110
+ logger.info("โœ… polyglot-ko-1.3b ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
111
+ return model, tokenizer
112
+
113
+ except Exception as e:
114
+ logger.error(f"๋กœ์ปฌ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
115
+ logger.info("ํ…Œ์ŠคํŠธ์šฉ ๊ฐ„๋‹จํ•œ ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
116
+
117
+ # DialoGPT-medium์œผ๋กœ ๋Œ€์ฒด (๋” ์ž‘์€ ๋ชจ๋ธ)
118
+ test_model_name = "microsoft/DialoGPT-medium"
119
+ tokenizer = AutoTokenizer.from_pretrained(test_model_name)
120
+ model = AutoModelForCausalLM.from_pretrained(test_model_name)
121
+
122
+ return model, tokenizer
123
+
124
+ # ๋ณ„๋„ ์Šค๋ ˆ๋“œ์—์„œ ๋ชจ๋ธ ๋กœ๋”ฉ
125
+ loop = asyncio.get_event_loop()
126
+ with concurrent.futures.ThreadPoolExecutor() as executor:
127
+ model, tokenizer = await loop.run_in_executor(executor, load_model_sync)
128
+
129
+ @app.get("/", response_model=dict)
130
+ async def root():
131
+ """๋ฃจํŠธ ์—”๋“œํฌ์ธํŠธ"""
132
+ return {
133
+ "message": "Lily LLM API ์„œ๋ฒ„",
134
+ "version": "1.0.0",
135
+ "model": "Mistral-7B-Instruct-v0.2 (Fine-tuned)",
136
+ "docs": "/docs"
137
+ }
138
+
139
+ @app.get("/health", response_model=HealthResponse)
140
+ async def health_check():
141
+ """ํ—ฌ์Šค ์ฒดํฌ ์—”๋“œํฌ์ธํŠธ"""
142
+ return HealthResponse(
143
+ status="healthy",
144
+ model_loaded=model_loaded,
145
+ model_name="Lily LLM (Mistral-7B)"
146
+ )
147
+
148
+ @app.post("/generate", response_model=GenerateResponse)
149
+ async def generate_text(request: GenerateRequest):
150
+ """ํ…์ŠคํŠธ ์ƒ์„ฑ ์—”๋“œํฌ์ธํŠธ"""
151
+ global model, tokenizer
152
+
153
+ if not model_loaded or model is None or tokenizer is None:
154
+ raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
155
+
156
+ start_time = time.time()
157
+
158
+ try:
159
+ logger.info(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹œ์ž‘: '{request.prompt}'")
160
+
161
+ # polyglot ๋ชจ๋ธ์— ๋งž๋Š” ํ”„๋กฌํ”„ํŠธ ํ˜•์‹์œผ๋กœ ์ˆ˜์ •
162
+ formatted_prompt = f"์งˆ๋ฌธ: {request.prompt}\n๋‹ต๋ณ€:"
163
+ logger.info(f"ํฌ๋งท๋œ ํ”„๋กฌํ”„ํŠธ: '{formatted_prompt}'")
164
+
165
+ # ์ž…๋ ฅ ํ† ํฌ๋‚˜์ด์ง• - padding ์ œ๊ฑฐํ•˜๊ณ  ํŒจ๋”ฉ ํ† ํฐ ์„ค์ •
166
+ if tokenizer.pad_token is None:
167
+ tokenizer.pad_token = tokenizer.eos_token
168
+
169
+ inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True)
170
+ logger.info(f"์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
171
+
172
+ # ํ…์ŠคํŠธ ์ƒ์„ฑ - ๋” ๊ฐ•๋ ฅํ•œ ์„ค์ •์œผ๋กœ ์ˆ˜์ •
173
+ with torch.no_grad():
174
+ outputs = model.generate(
175
+ inputs["input_ids"],
176
+ attention_mask=inputs["attention_mask"],
177
+ max_new_tokens=request.max_length,
178
+ do_sample=True,
179
+ temperature=0.9, # ๋” ๋†’์€ temperature
180
+ top_k=50, # top_k ์ถ”๊ฐ€
181
+ top_p=0.95, # top_p ์ถ”๊ฐ€
182
+ repetition_penalty=1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
183
+ no_repeat_ngram_size=2, # n-gram ๋ฐ˜๋ณต ๋ฐฉ์ง€
184
+ pad_token_id=tokenizer.eos_token_id,
185
+ eos_token_id=tokenizer.eos_token_id
186
+ )
187
+
188
+ logger.info(f"์ƒ์„ฑ๋œ ํ† ํฐ ์ˆ˜: {outputs.shape[1]}")
189
+
190
+ # ๊ฒฐ๊ณผ ๋””์ฝ”๋”ฉ
191
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
192
+ logger.info(f"๋””์ฝ”๋”ฉ๋œ ์ „์ฒด ํ…์ŠคํŠธ: '{generated_text}'")
193
+
194
+ # polyglot ์‘๋‹ต ๋ถ€๋ถ„๋งŒ ์ถ”์ถœ
195
+ if "๋‹ต๋ณ€:" in generated_text:
196
+ response = generated_text.split("๋‹ต๋ณ€:")[-1].strip()
197
+ logger.info(f"๋‹ต๋ณ€ ์ถ”์ถœ: '{response}'")
198
+ else:
199
+ # ๊ธฐ์กด ๋ฐฉ์‹์œผ๋กœ ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ
200
+ if formatted_prompt in generated_text:
201
+ response = generated_text.replace(formatted_prompt, "").strip()
202
+ else:
203
+ response = generated_text.strip()
204
+ logger.info(f"ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ ํ›„: '{response}'")
205
+
206
+ # ๋นˆ ์‘๋‹ต ์ฒ˜๋ฆฌ
207
+ if not response.strip():
208
+ logger.warning("์ƒ์„ฑ๋œ ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์Œ, ๊ธฐ๋ณธ ์‘๋‹ต ์‚ฌ์šฉ")
209
+ response = "์•ˆ๋…•ํ•˜์„ธ์š”! ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”?"
210
+
211
+ processing_time = time.time() - start_time
212
+
213
+ logger.info(f"์ƒ์„ฑ ์™„๋ฃŒ: {processing_time:.2f}์ดˆ, ํ…์ŠคํŠธ ๊ธธ์ด: {len(response)}")
214
+
215
+ return GenerateResponse(
216
+ generated_text=response,
217
+ processing_time=processing_time
218
+ )
219
+
220
+ except Exception as e:
221
+ logger.error(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
222
+ raise HTTPException(status_code=500, detail=f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}")
223
+
224
+ @app.get("/models")
225
+ async def list_models():
226
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
227
+ return {
228
+ "models": [
229
+ {
230
+ "id": "lily-llm",
231
+ "name": "Lily LLM",
232
+ "description": "Hearth Chat์šฉ ํŒŒ์ธํŠœ๋‹๋œ Mistral-7B ๋ชจ๋ธ",
233
+ "base_model": "mistralai/Mistral-7B-Instruct-v0.2",
234
+ "fine_tuned": True
235
+ }
236
+ ]
237
+ }
238
+
239
+ if __name__ == "__main__":
240
+ uvicorn.run(
241
+ app,
242
+ host="0.0.0.0",
243
+ port=8001,
244
+ reload=False,
245
+ log_level="info"
246
+ )
lily_llm_api/app_v2.py ADDED
@@ -0,0 +1,2049 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Lily LLM API ์„œ๋ฒ„ v2 (์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ์„ ํƒ ๋ณต์› ๋ฐ ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ตœ์ข…๋ณธ)
4
+ """
5
+ from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Depends, WebSocket, WebSocketDisconnect
6
+ from fastapi.security import HTTPAuthorizationCredentials
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from pydantic import BaseModel
9
+ import uvicorn
10
+ import logging
11
+ import time
12
+ import torch
13
+ from datetime import datetime
14
+ from typing import Optional, List, Union
15
+ import asyncio
16
+ import concurrent.futures
17
+ import sys
18
+ from PIL import Image
19
+ import io
20
+ import os
21
+ import json
22
+ from pathlib import Path
23
+
24
+ from .models import get_model_profile, list_available_models
25
+ from lily_llm_core.rag_processor import rag_processor
26
+ from lily_llm_core.document_processor import document_processor
27
+ from lily_llm_core.hybrid_prompt_generator import hybrid_prompt_generator
28
+ from lily_llm_core.database import db_manager
29
+ from lily_llm_core.auth_manager import auth_manager
30
+ from lily_llm_core.websocket_manager import connection_manager
31
+ from lily_llm_core.celery_app import (
32
+ process_document_async, generate_ai_response_async,
33
+ rag_query_async, batch_process_documents_async,
34
+ get_task_status, cancel_task
35
+ )
36
+ from lily_llm_core.performance_monitor import performance_monitor
37
+
38
+ # ์ด๋ฏธ์ง€ OCR ์ „์šฉ ๋ชจ๋“ˆ ์ถ”๊ฐ€
39
+ from lily_llm_core.image_rag_processor import image_rag_processor
40
+ from lily_llm_core.latex_rag_processor import latex_rag_processor
41
+ from lily_llm_core.vector_store_manager import vector_store_manager
42
+
43
+ # LaTeX-OCR + FAISS ํ†ตํ•ฉ ์‹œ์Šคํ…œ ์ถ”๊ฐ€
44
+ from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
45
+ from latex_ocr_faiss_simple import LatexOCRFAISSSimple
46
+
47
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ํ”„๋กœ์„ธ์„œ ์ถ”๊ฐ€
48
+ from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
49
+
50
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # FastAPI ์•ฑ ์ƒ์„ฑ
54
+ app = FastAPI(
55
+ title="Lily LLM API v2",
56
+ description="๋‹ค์ค‘ ๋ชจ๋ธ ์ง€์› LLM API ์„œ๋ฒ„",
57
+ version="2.0.0"
58
+ )
59
+
60
+ # CORS ์„ค์ •
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=[
64
+ "http://localhost:8001",
65
+ "http://127.0.0.1:8001",
66
+ "http://localhost:3000",
67
+ "http://127.0.0.1:3000",
68
+ "*" # ๊ฐœ๋ฐœ ์ค‘์—๋Š” ๋ชจ๋“  origin ํ—ˆ์šฉ
69
+ ],
70
+ allow_credentials=True,
71
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
72
+ allow_headers=["*"],
73
+ )
74
+
75
+ # Pydantic ๋ชจ๋ธ๋“ค
76
+ class GenerateRequest(BaseModel):
77
+ prompt: str
78
+ model_id: Optional[str] = None # ๊ธฐ๋ณธ๊ฐ’ ์ œ๊ฑฐ - ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ์‚ฌ์šฉ
79
+ max_length: Optional[int] = None
80
+ temperature: Optional[float] = None
81
+ top_p: Optional[float] = None
82
+ do_sample: Optional[bool] = None
83
+
84
+ class GenerateResponse(BaseModel):
85
+ generated_text: str
86
+ processing_time: float
87
+ model_name: str
88
+ image_processed: bool
89
+
90
+ class MultimodalGenerateResponse(BaseModel):
91
+ generated_text: str
92
+ processing_time: float
93
+ model_name: str
94
+ model_id: Optional[str] = None
95
+ image_processed: bool = False
96
+
97
+ class HealthResponse(BaseModel):
98
+ status: str
99
+ model_loaded: bool
100
+ current_model: str
101
+ available_models: List[dict]
102
+
103
+ class DocumentUploadResponse(BaseModel):
104
+ success: bool
105
+ document_id: str
106
+ message: str
107
+ chunks: Optional[int] = None
108
+ latex_count: Optional[int] = None # LaTeX ์ˆ˜์‹ ๊ฐœ์ˆ˜ ํ•„๋“œ ์ถ”๊ฐ€
109
+ error: Optional[str] = None
110
+ auto_response: Optional[str] = None # ์ž๋™ ์‘๋‹ต ํ•„๋“œ ์ถ”๊ฐ€
111
+
112
+ class RAGResponse(BaseModel):
113
+ success: bool
114
+ response: str
115
+ context: str
116
+ sources: List[dict]
117
+ search_results: int
118
+ processing_time: float
119
+
120
+ # ์‚ฌ์šฉ์ž ๊ด€๋ จ ์‘๋‹ต ๋ชจ๋ธ
121
+ class UserResponse(BaseModel):
122
+ success: bool
123
+ user_id: str
124
+ username: Optional[str] = None
125
+ email: Optional[str] = None
126
+ created_at: Optional[str] = None
127
+ error: Optional[str] = None
128
+
129
+ class SessionResponse(BaseModel):
130
+ success: bool
131
+ session_id: str
132
+ session_name: Optional[str] = None
133
+ created_at: Optional[str] = None
134
+ error: Optional[str] = None
135
+
136
+ class ChatMessageResponse(BaseModel):
137
+ success: bool
138
+ message_id: int
139
+ content: str
140
+ message_type: str
141
+ timestamp: str
142
+ error: Optional[str] = None
143
+
144
+ # ์ธ์ฆ ๊ด€๋ จ ์‘๋‹ต ๋ชจ๋ธ
145
+ class LoginResponse(BaseModel):
146
+ success: bool
147
+ access_token: Optional[str] = None
148
+ refresh_token: Optional[str] = None
149
+ token_type: Optional[str] = None
150
+ user_id: Optional[str] = None
151
+ username: Optional[str] = None
152
+ error: Optional[str] = None
153
+
154
+ class TokenResponse(BaseModel):
155
+ success: bool
156
+ access_token: Optional[str] = None
157
+ token_type: Optional[str] = None
158
+ error: Optional[str] = None
159
+
160
+ # ์ „์—ญ ๋ณ€์ˆ˜
161
+ model = None
162
+ tokenizer = None
163
+ current_profile = None
164
+ model_loaded = False
165
+ image_processor = None
166
+ executor = concurrent.futures.ThreadPoolExecutor()
167
+
168
+ def select_model_interactive():
169
+ """์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ"""
170
+ available_models = list_available_models()
171
+
172
+ print("\n" + "="*60 + "\n๐Ÿค– Lily LLM API v2 - ๋ชจ๋ธ ์„ ํƒ\n" + "="*60)
173
+ for i, model_info in enumerate(available_models, 1):
174
+ print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
175
+ while True:
176
+ try:
177
+ # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
178
+ # selected_model = available_models[int(choice) - 1]
179
+ selected_model = available_models[1]
180
+ print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
181
+ return selected_model['model_id']
182
+ except (ValueError, IndexError):
183
+ print(f"โŒ 1์—์„œ {len(available_models)} ์‚ฌ์ด์˜ ์ˆซ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
184
+ except KeyboardInterrupt: sys.exit("\n\n๐Ÿ‘‹ ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
185
+
186
+ @app.on_event("startup")
187
+ async def startup_event():
188
+ """[๋ณต์›] ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ ๋ฐ ๋กœ๋“œ"""
189
+ global model_loaded
190
+ selected_model_id = select_model_interactive()
191
+ try:
192
+ await load_model_async(selected_model_id)
193
+ model_loaded = True
194
+ logger.info(f"โœ… ์„œ๋ฒ„๊ฐ€ '{current_profile.display_name}' ๋ชจ๋ธ๋กœ ์ค€๋น„๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
195
+ except Exception as e:
196
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}", exc_info=True)
197
+ model_loaded = False
198
+
199
+ @app.on_event("shutdown")
200
+ def shutdown_event():
201
+ executor.shutdown(wait=True)
202
+
203
+ async def load_model_async(model_id: str):
204
+ loop = asyncio.get_event_loop()
205
+ await loop.run_in_executor(executor, load_model_sync, model_id)
206
+
207
+ def load_model_sync(model_id: str):
208
+ """๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ"""
209
+ global model, tokenizer, image_processor, current_profile
210
+
211
+ try:
212
+ # ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ (๋ฉ”๋ชจ๋ฆฌ ์ •๋ฆฌ)
213
+ if model is not None:
214
+ logger.info("๐Ÿ—‘๏ธ ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์ค‘...")
215
+ del model
216
+ del tokenizer
217
+ model = None
218
+ tokenizer = None
219
+ import gc
220
+ gc.collect()
221
+ logger.info("โœ… ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
222
+
223
+ logger.info(f"๐Ÿ“ฅ '{model_id}' ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
224
+ current_profile = get_model_profile(model_id)
225
+ model, tokenizer = current_profile.load_model()
226
+
227
+ # ๋ชจ๋ธ ์ •๋ณด ๋””๋ฒ„๊ทธ
228
+ if hasattr(model, 'num_parameters'):
229
+ logger.info(f"๐Ÿ“Š ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ˆ˜: {model.num_parameters():,}")
230
+
231
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
232
+ if getattr(current_profile, 'multimodal', False):
233
+ logger.info("๐Ÿ”ง ํ† ํฌ๋‚˜์ด์ € ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๊ธฐ๋Šฅ ํ™œ์„ฑํ™”...")
234
+ tokenizer.mllm_setup(num_visual_tokens=1)
235
+ from transformers import AutoImageProcessor
236
+ image_processor = AutoImageProcessor.from_pretrained(current_profile.local_path, trust_remote_code=True, local_files_only=True)
237
+ logger.info("โœ… ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ ์„ฑ๊ณต!")
238
+ else:
239
+ image_processor = None
240
+
241
+ logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
242
+
243
+ except Exception as e:
244
+ logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
245
+ import traceback
246
+ logger.error(f"๐Ÿ” ์ „์ฒด ์—๋Ÿฌ: {traceback.format_exc()}")
247
+ raise
248
+
249
+ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
250
+ temperature: Optional[float] = None, top_p: Optional[float] = None,
251
+ do_sample: Optional[bool] = None) -> dict:
252
+ """[์ตœ์ ํ™”] ๋ชจ๋ธ ์ƒ์„ฑ์„ ์ฒ˜๋ฆฌํ•˜๋Š” ํ†ตํ•ฉ ๋™๊ธฐ ํ•จ์ˆ˜"""
253
+ image_processed = False
254
+ all_pixel_values = []
255
+ combined_image_metas = None
256
+
257
+ # --- 1. ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ (์ด๋ฏธ์ง€๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ ์‹คํ–‰) ---
258
+ if image_data_list and getattr(current_profile, 'multimodal', False):
259
+ logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์š”์ฒญ ์ฒ˜๋ฆฌ ์‹œ์ž‘... (์ด๋ฏธ์ง€ {len(image_data_list)}๊ฐœ)")
260
+ image_processed = True
261
+ all_image_metas = []
262
+
263
+ for i, image_data in enumerate(image_data_list):
264
+ pil_image = Image.open(io.BytesIO(image_data)).convert("RGB")
265
+ processed_data = image_processor(pil_image)
266
+ all_pixel_values.append(processed_data["pixel_values"])
267
+ all_image_metas.append(processed_data["image_meta"])
268
+
269
+ # ๋ชจ๋“  ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ํ•˜๋‚˜๋กœ ํ•ฉ์น˜๊ธฐ
270
+ if all_image_metas:
271
+ combined_image_metas = {}
272
+ for key in all_image_metas[0].keys():
273
+ combined_image_metas[key] = [meta[key] for meta in all_image_metas]
274
+
275
+ # --- 2. ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (์ด๋ฏธ์ง€ ์œ ๋ฌด์— ๊ด€๊ณ„์—†์ด ๊ณตํ†ต ์‹คํ–‰) ---
276
+ image_tokens = "<image>" * len(all_pixel_values)
277
+ # ํ…์ŠคํŠธ์™€ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋‘ ๋™์ผํ•œ ํ”„๋กฌํ”„ํŠธ ๏ฟฝ๏ฟฝํ”Œ๋ฆฟ ์‚ฌ์šฉ
278
+ formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
279
+
280
+ # --- 3. ํ† ํฌ๋‚˜์ด์ง• (๊ณตํ†ต ์‹คํ–‰) ---
281
+ # ํ…์ŠคํŠธ์™€ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋‘ ๋™์ผํ•œ ์ปค์Šคํ…€ ํ† ํฌ๋‚˜์ด์ € ํ•จ์ˆ˜ ์‚ฌ์šฉ
282
+ inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
283
+
284
+ input_ids = inputs['input_ids'].unsqueeze(0).to(model.device)
285
+ attention_mask = inputs['attention_mask'].unsqueeze(0).to(model.device)
286
+
287
+ # --- 4. ๋ชจ๋ธ ์ƒ์„ฑ (๊ณตํ†ต ์‹คํ–‰) ---
288
+ gen_config = current_profile.get_generation_config()
289
+
290
+ # max_length ๋“ฑ ์‚ฌ์šฉ์ž ์ง€์ • ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์žˆ์œผ๋ฉด gen_config์— ๋ฐ˜์˜
291
+ if max_length is not None: gen_config['max_new_tokens'] = max_length
292
+ if temperature is not None: gen_config['temperature'] = temperature
293
+ if top_p is not None: gen_config['top_p'] = top_p
294
+ if do_sample is not None: gen_config['do_sample'] = do_sample
295
+
296
+ with torch.no_grad():
297
+ if image_processed:
298
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ
299
+ outputs = model.generate(
300
+ pixel_values=all_pixel_values,
301
+ image_metas=combined_image_metas,
302
+ input_ids=input_ids,
303
+ attention_mask=attention_mask,
304
+ **gen_config
305
+ )
306
+ else:
307
+ # ํ…์ŠคํŠธ ์ „์šฉ ์ƒ์„ฑ (์ตœ์ ํ™”๋œ ๊ฒฝ๋กœ ์‚ฌ์šฉ)
308
+ outputs = model.generate(
309
+ input_ids=input_ids,
310
+ attention_mask=attention_mask,
311
+ **gen_config
312
+ )
313
+
314
+ # --- 5. ์‘๋‹ต ์ถ”์ถœ (๊ณตํ†ต ๋กœ์ง) ---
315
+ try:
316
+ full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
317
+ response = current_profile.extract_response(full_text, formatted_prompt)
318
+ return {"text": response, "image_processed": image_processed}
319
+ except Exception as e:
320
+ logger.error(f"โŒ ์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜: {e}")
321
+ raise HTTPException(status_code=500, detail=f"์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}")
322
+
323
+ @app.post("/generate", response_model=GenerateResponse)
324
+ async def generate(prompt: str = Form(...),
325
+ image1: UploadFile = File(None),
326
+ image2: UploadFile = File(None),
327
+ image3: UploadFile = File(None),
328
+ image4: UploadFile = File(None),
329
+ max_length: Optional[int] = Form(None),
330
+ temperature: Optional[float] = Form(None),
331
+ top_p: Optional[float] = Form(None),
332
+ do_sample: Optional[bool] = Form(None)):
333
+ """[ํ†ตํ•ฉ ์—”๋“œํฌ์ธํŠธ] ํ…์ŠคํŠธ ๋ฐ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ"""
334
+ if not model_loaded:
335
+ raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
336
+
337
+ start_time = time.time()
338
+ loop = asyncio.get_event_loop()
339
+
340
+ # ๋‹ค์ค‘ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
341
+ image_data_list = []
342
+ for img in [image1, image2, image3, image4]:
343
+ if img:
344
+ image_data = await img.read()
345
+ image_data_list.append(image_data)
346
+
347
+ result = await loop.run_in_executor(executor, generate_sync, prompt, image_data_list,
348
+ max_length, temperature, top_p, do_sample)
349
+
350
+ processing_time = time.time() - start_time
351
+ logger.info(f"โœ… ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ), ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ: {result['image_processed']}")
352
+
353
+ return GenerateResponse(
354
+ generated_text=result["text"],
355
+ processing_time=processing_time,
356
+ model_name=current_profile.display_name,
357
+ image_processed=result["image_processed"]
358
+ )
359
+
360
+ @app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
361
+ async def generate_multimodal(
362
+ prompt: str = Form(...),
363
+ image: UploadFile = File(None),
364
+ model_id: Optional[str] = Form(None),
365
+ max_length: Optional[int] = Form(None),
366
+ temperature: Optional[float] = Form(None),
367
+ top_p: Optional[float] = Form(None),
368
+ do_sample: Optional[bool] = Form(None)
369
+ ):
370
+ """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ…์ŠคํŠธ ์ƒ์„ฑ (์ด๋ฏธ์ง€ + ํ…์ŠคํŠธ)"""
371
+ start_time = time.time()
372
+
373
+ try:
374
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
375
+ if not model_loaded or not model or not tokenizer or not current_profile:
376
+ raise HTTPException(status_code=500, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
377
+
378
+ # Kanana ๋ชจ๋ธ์ด ์•„๋‹ˆ๋ฉด ์ผ๋ฐ˜ ํ…์ŠคํŠธ ์ƒ์„ฑ์œผ๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
379
+ if "kanana" not in current_profile.model_name.lower():
380
+ logger.warning("๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์š”์ฒญ์ด์ง€๋งŒ Kanana ๋ชจ๋ธ์ด ์•„๋‹˜ - ์ผ๋ฐ˜ ํ…์ŠคํŠธ ์ƒ์„ฑ์œผ๋กœ ์ฒ˜๋ฆฌ")
381
+ # ์ผ๋ฐ˜ generate ์—”๋“œํฌ์ธํŠธ๋กœ ๋ฆฌ๋‹ค์ด๋ ‰ํŠธ
382
+ loop = asyncio.get_event_loop()
383
+ result = await loop.run_in_executor(executor, generate_sync, prompt, None,
384
+ max_length, temperature, top_p, do_sample)
385
+ return MultimodalGenerateResponse(
386
+ generated_text=result["text"],
387
+ processing_time=time.time() - start_time,
388
+ model_name=current_profile.display_name,
389
+ model_id=model_id or current_profile.get_model_info()["model_name"],
390
+ image_processed=False
391
+ )
392
+
393
+ logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹œ์ž‘: '{prompt}'")
394
+
395
+ # ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
396
+ pixel_values = None
397
+ image_metas = None
398
+ image_processed = False
399
+ if image:
400
+ logger.info(f"๐Ÿ“ธ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์ค‘: {image.filename}")
401
+ try:
402
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฝ๊ธฐ
403
+ image_data = await image.read()
404
+ pil_image = Image.open(io.BytesIO(image_data))
405
+
406
+ # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ (Kanana ๋ชจ๋ธ์— ๋งž๊ฒŒ)
407
+ logger.info(f"โœ… ์ด๋ฏธ์ง€ ๋กœ๋“œ ์™„๋ฃŒ: {pil_image.size}")
408
+
409
+ # Kanana ๋ชจ๋ธ์˜ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
410
+ from transformers import AutoImageProcessor
411
+
412
+ # ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ
413
+ image_processor = AutoImageProcessor.from_pretrained(
414
+ current_profile.get_model_info()["local_path"],
415
+ trust_remote_code=True
416
+ )
417
+
418
+ # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ (Kanana ๋ฐฉ์‹)
419
+ processed_images = image_processor(pil_image)
420
+ pixel_values = processed_images["pixel_values"]
421
+ image_metas = processed_images["image_meta"]
422
+
423
+ # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
424
+ pixel_values = pixel_values.to(model.device)
425
+ if image_metas and "vision_grid_thw" in image_metas:
426
+ # vision_grid_thw๊ฐ€ ์Šค์นผ๋ผ๊ฐ€ ์•„๋‹Œ ํ…์„œ์ธ์ง€ ํ™•์ธ
427
+ grid_thw = image_metas["vision_grid_thw"]
428
+ if isinstance(grid_thw, (list, tuple)):
429
+ grid_thw = torch.tensor(grid_thw)
430
+ elif not isinstance(grid_thw, torch.Tensor):
431
+ grid_thw = torch.tensor([grid_thw])
432
+ image_metas["vision_grid_thw"] = grid_thw.to(model.device)
433
+
434
+ # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ๋กœ๊ทธ ์ถ”๊ฐ€
435
+ logger.info(f"๐Ÿ” pixel_values ํ˜•ํƒœ: {pixel_values.shape}")
436
+ logger.info(f"๐Ÿ” image_metas keys: {list(image_metas.keys()) if image_metas else 'None'}")
437
+ if image_metas and "vision_grid_thw" in image_metas:
438
+ logger.info(f"๐Ÿ” vision_grid_thw ํ˜•ํƒœ: {image_metas['vision_grid_thw'].shape}")
439
+
440
+ image_processed = True
441
+ logger.info(f"โœ… ์ด๋ฏธ์ง€ ํ…์„œ ๋ณ€ํ™˜ ์™„๋ฃŒ: {pixel_values.shape}")
442
+
443
+ except Exception as e:
444
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
445
+ pixel_values = None
446
+ image_metas = None
447
+ image_processed = False
448
+ logger.info("๐Ÿ“ ์ด๋ฏธ์ง€ ์—†์ด ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
449
+ else:
450
+ logger.info("๐Ÿ“ธ ์ด๋ฏธ์ง€ ์—†์Œ - ํ…์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌ")
451
+ image_processed = False
452
+
453
+ # Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹
454
+ system_prompt = "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด๋กœ ๋Œ€ํ™”ํ•˜๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ๋ชจ๋“  ์‘๋‹ต์€ ํ•œ๊ตญ์–ด๋กœ ํ•ด์ฃผ์„ธ์š”."
455
+ if image_processed:
456
+ formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
457
+ logger.info(f"๐Ÿ–ผ๏ธ Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ: '{formatted_prompt}'")
458
+ else:
459
+ formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
460
+ logger.info(f"๐Ÿ” Kanana ํ…์ŠคํŠธ ์ „์šฉ ํ”„๋กฌํ”„ํŠธ: '{formatted_prompt}'")
461
+
462
+ # ํ† ํฌ๋‚˜์ด์ง•
463
+ inputs = tokenizer(
464
+ formatted_prompt,
465
+ return_tensors="pt",
466
+ padding=True,
467
+ truncation=True,
468
+ max_length=100,
469
+ )
470
+
471
+ if 'token_type_ids' in inputs:
472
+ del inputs['token_type_ids']
473
+
474
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
475
+ logger.info(f"์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {inputs['input_ids'].shape[1]}")
476
+
477
+ # Kanana์šฉ ์ƒ์„ฑ ์„ค์ •
478
+ max_new_tokens = max_length or 100
479
+ temperature = temperature or 0.7
480
+ top_p = top_p or 0.9
481
+ do_sample = do_sample if do_sample is not None else True
482
+
483
+ with torch.no_grad():
484
+ if image_processed and pixel_values is not None:
485
+ # Kanana ๋ชจ๋ธ์˜ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ๋„
486
+ logger.info("๐Ÿ” Kanana ๋ชจ๋ธ์˜ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ๋„...")
487
+
488
+ try:
489
+ # vision_grid_thw๋ฅผ ์˜ฌ๋ฐ”๋ฅธ ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜ ์‹œ๋„
490
+ if 'vision_grid_thw' in image_metas:
491
+ grid_thw = image_metas['vision_grid_thw']
492
+ if isinstance(grid_thw, (list, tuple)):
493
+ grid_thw = torch.tensor(grid_thw)
494
+ elif not isinstance(grid_thw, torch.Tensor):
495
+ grid_thw = torch.tensor([grid_thw])
496
+ image_metas['vision_grid_thw'] = grid_thw.to(model.device)
497
+
498
+ # forward_vision๊ณผ forward_projector๋ฅผ ๋ถ„๋ฆฌํ•ด์„œ ์‹œ๋„
499
+ visual_features = model.forward_vision(pixel_values, image_metas)
500
+ visual_embeds = model.forward_projector(visual_features, image_metas)
501
+
502
+ # ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ
503
+ text_embeds = model.embed_text_tokens(inputs["input_ids"])
504
+
505
+ # ์‹œ๊ฐ์  ์ž„๋ฒ ๋”ฉ์„ ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ๊ณผ ๊ฐ™์€ dtype์œผ๋กœ ๋ณ€ํ™˜
506
+ target_dtype = text_embeds.dtype
507
+ visual_embeds_converted = visual_embeds.to(target_dtype)
508
+
509
+ # ํ…์ŠคํŠธ ์ž„๋ฒ ๋”ฉ์„ ํ‰๋ฉดํ™”
510
+ from einops import rearrange
511
+ flattened_text_embeds = rearrange(text_embeds, "b l d -> (b l) d")
512
+ flattened_input_ids = rearrange(inputs["input_ids"], "b l -> (b l)")
513
+
514
+ # -1 ํ† ํฐ ์œ„์น˜์— ์‹œ๊ฐ์  ์ž„๋ฒ ๋”ฉ ์‚ฝ์ž…
515
+ mask = (flattened_input_ids == -1)
516
+ if mask.sum() > 0:
517
+ flattened_text_embeds[mask] = visual_embeds_converted[:mask.sum()]
518
+
519
+ # ๋‹ค์‹œ ๋ฐฐ์น˜ ํ˜•ํƒœ๋กœ ์žฌ๊ตฌ์„ฑ
520
+ input_embeds = rearrange(flattened_text_embeds, "(b l) d -> b l d", b=inputs["input_ids"].shape[0])
521
+ attention_mask = inputs["attention_mask"]
522
+
523
+ # ์–ธ์–ด ๋ชจ๋ธ์˜ dtype์— ๋งž์ถฐ input_embeds ๋ณ€ํ™˜
524
+ language_model_dtype = next(model.language_model.parameters()).dtype
525
+ if input_embeds.dtype != language_model_dtype:
526
+ input_embeds = input_embeds.to(language_model_dtype)
527
+
528
+ # Kanana ๋ชจ๋ธ์˜ ์›๋ž˜ generate ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ
529
+ outputs = model.generate(
530
+ input_ids=inputs["input_ids"],
531
+ pixel_values=pixel_values,
532
+ image_metas=image_metas,
533
+ attention_mask=inputs["attention_mask"],
534
+ max_new_tokens=max_new_tokens,
535
+ do_sample=do_sample,
536
+ temperature=temperature,
537
+ top_k=40,
538
+ top_p=top_p,
539
+ repetition_penalty=1.1,
540
+ no_repeat_ngram_size=2,
541
+ pad_token_id=tokenizer.eos_token_id,
542
+ eos_token_id=tokenizer.eos_token_id,
543
+ use_cache=True
544
+ )
545
+ logger.info("โœ… ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์„ฑ๊ณต!")
546
+
547
+ except Exception as e:
548
+ logger.error(f"โŒ ์‹ค์ œ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
549
+ logger.error(f"๐Ÿ” ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
550
+ import traceback
551
+ logger.error(f"๐Ÿ” ์ƒ์„ธ ์˜ค๋ฅ˜: {traceback.format_exc()}")
552
+ logger.info("๐Ÿ”„ fallback: ํ…์ŠคํŠธ ์ „์šฉ ์ฒ˜๋ฆฌ๋กœ ์ „ํ™˜")
553
+
554
+ # fallback: ํ…์ŠคํŠธ ์ „์šฉ ์ฒ˜๋ฆฌ
555
+ enhanced_prompt = f"[์ด๋ฏธ์ง€ ์ฒจ๋ถ€๋จ] {prompt}"
556
+ enhanced_formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{enhanced_prompt}<|im_end|>\n<|im_start|>assistant\n"
557
+
558
+ enhanced_inputs = tokenizer(
559
+ enhanced_formatted_prompt,
560
+ return_tensors="pt",
561
+ padding=True,
562
+ truncation=True,
563
+ max_length=256
564
+ )
565
+
566
+ if 'token_type_ids' in enhanced_inputs:
567
+ del enhanced_inputs['token_type_ids']
568
+
569
+ enhanced_inputs = {k: v.to(model.device) for k, v in enhanced_inputs.items()}
570
+
571
+ outputs = model.language_model.generate(
572
+ input_ids=enhanced_inputs["input_ids"],
573
+ attention_mask=enhanced_inputs["attention_mask"],
574
+ max_new_tokens=max_new_tokens,
575
+ do_sample=do_sample,
576
+ temperature=temperature,
577
+ top_k=40,
578
+ top_p=top_p,
579
+ repetition_penalty=1.1,
580
+ no_repeat_ngram_size=2,
581
+ pad_token_id=tokenizer.eos_token_id,
582
+ eos_token_id=tokenizer.eos_token_id,
583
+ use_cache=True
584
+ )
585
+ else:
586
+ # ํ…์ŠคํŠธ ์ „์šฉ ์ƒ์„ฑ
587
+ logger.info("๐Ÿ“ ํ…์ŠคํŠธ ์ „์šฉ ์ƒ์„ฑ ์‹œ์ž‘")
588
+ outputs = model.generate(
589
+ input_ids=inputs["input_ids"],
590
+ attention_mask=inputs["attention_mask"],
591
+ max_new_tokens=max_new_tokens,
592
+ do_sample=do_sample,
593
+ temperature=temperature,
594
+ top_k=40,
595
+ top_p=top_p,
596
+ repetition_penalty=1.1,
597
+ no_repeat_ngram_size=2,
598
+ pad_token_id=tokenizer.eos_token_id,
599
+ eos_token_id=tokenizer.eos_token_id,
600
+ use_cache=True
601
+ )
602
+
603
+ # ์‘๋‹ต ์ถ”์ถœ
604
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
605
+ logger.info(f"์ƒ์„ฑ๋œ ํ† ํฐ ์ˆ˜: {outputs.shape[1]}")
606
+ logger.info(f"๋””์ฝ”๋”ฉ๋œ ์ „์ฒด ํ…์ŠคํŠธ: '{generated_text}'")
607
+
608
+ # Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์‘๋‹ต ์ถ”์ถœ
609
+ if "<|im_start|>assistant" in generated_text:
610
+ response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
611
+ logger.info(f"๐Ÿ” Kanana ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์‘๋‹ต ์ถ”์ถœ: '{response}'")
612
+ else:
613
+ response = generated_text.strip()
614
+ logger.info(f"๐Ÿ” Kanana ์ „์ฒด ํ…์ŠคํŠธ: '{response}'")
615
+
616
+ processing_time = time.time() - start_time
617
+ logger.info(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์™„๋ฃŒ: {processing_time:.2f}์ดˆ, ํ…์ŠคํŠธ ๊ธธ์ด: {len(response)}, ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ: {image_processed}")
618
+
619
+ return MultimodalGenerateResponse(
620
+ generated_text=response,
621
+ processing_time=processing_time,
622
+ model_name=current_profile.display_name,
623
+ model_id=model_id or current_profile.get_model_info()["model_name"],
624
+ image_processed=image_processed
625
+ )
626
+
627
+ except Exception as e:
628
+ processing_time = time.time() - start_time
629
+ logger.error(f"โŒ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e} (์†Œ์š” ์‹œ๊ฐ„: {processing_time:.2f}์ดˆ)")
630
+ raise HTTPException(status_code=500, detail=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}")
631
+
632
+ @app.get("/models")
633
+ async def list_models():
634
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
635
+ return {
636
+ "models": list_available_models(),
637
+ "current_model": current_profile.get_model_info() if current_profile else None
638
+ }
639
+
640
+ @app.post("/switch-model")
641
+ async def switch_model(model_id: str):
642
+ """๋ชจ๋ธ ๋ณ€๊ฒฝ"""
643
+ try:
644
+ await load_model_async(model_id)
645
+ return {
646
+ "message": f"๋ชจ๋ธ ๋ณ€๊ฒฝ ์„ฑ๊ณต: {model_id}",
647
+ "current_model": current_profile.display_name
648
+ }
649
+ except Exception as e:
650
+ raise HTTPException(status_code=500, detail=f"๋ชจ๋ธ ๋ณ€๊ฒฝ ์‹คํŒจ: {str(e)}")
651
+
652
+ @app.get("/", response_model=dict)
653
+ async def root():
654
+ """๋ฃจํŠธ ์—”๋“œํฌ์ธํŠธ"""
655
+ return {
656
+ "message": "Lily LLM API v2 ์„œ๋ฒ„",
657
+ "version": "2.0.0",
658
+ "current_model": current_profile.display_name if current_profile else "None",
659
+ "docs": "/docs"
660
+ }
661
+
662
+ @app.get("/health", response_model=HealthResponse)
663
+ async def health_check():
664
+ """ํ—ฌ์Šค ์ฒดํฌ ์—”๋“œํฌ์ธํŠธ"""
665
+ available_models = list_available_models()
666
+
667
+ return HealthResponse(
668
+ status="healthy",
669
+ model_loaded=model_loaded,
670
+ current_model=current_profile.display_name if current_profile else "None",
671
+ available_models=available_models
672
+ )
673
+
674
+ @app.post("/document/upload", response_model=DocumentUploadResponse)
675
+ async def upload_document(
676
+ file: UploadFile = File(...),
677
+ user_id: str = Form("default_user"), # ๊ธฐ๋ณธ ์‚ฌ์šฉ์ž ID
678
+ document_id: Optional[str] = Form(None) # ๋ฌธ์„œ ID (์ž๋™ ์ƒ์„ฑ ๊ฐ€๋Šฅ)
679
+ ):
680
+ """๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ RAG ์ฒ˜๋ฆฌ"""
681
+ start_time = time.time()
682
+
683
+ try:
684
+ # ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
685
+ if not document_id:
686
+ import uuid
687
+ document_id = str(uuid.uuid4())[:8]
688
+
689
+ # ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
690
+ temp_file_path = f"./temp_{document_id}_{file.filename}"
691
+ with open(temp_file_path, "wb") as f:
692
+ content = await file.read()
693
+ f.write(content)
694
+
695
+ # ๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
696
+ result = rag_processor.process_and_store_document(
697
+ user_id, document_id, temp_file_path
698
+ )
699
+
700
+ # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
701
+ import os
702
+ if os.path.exists(temp_file_path):
703
+ os.remove(temp_file_path)
704
+
705
+ processing_time = time.time() - start_time
706
+ logger.info(f"๐Ÿ“„ ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
707
+
708
+ # ๋ฌธ์„œ ์—…๋กœ๋“œ ํ›„ ์ž๋™์œผ๋กœ AI ์‘๋‹ต ์ƒ์„ฑ
709
+ if result["success"]:
710
+ try:
711
+ # ๊ฐ„๋‹จํ•œ ์š”์•ฝ ์งˆ๋ฌธ์œผ๋กœ AI ์‘๋‹ต ์ƒ์„ฑ
712
+ summary_query = f"์—…๋กœ๋“œ๋œ ๋ฌธ์„œ '{file.filename}'์˜ ์ฃผ์š” ๋‚ด์šฉ์„ ์š”์•ฝํ•ด์ฃผ์„ธ์š”."
713
+
714
+ logger.info(f"๐Ÿค– ๋ฌธ์„œ ์—…๋กœ๋“œ ํ›„ AI ์‘๋‹ต ์ƒ์„ฑ ์‹œ์ž‘...")
715
+
716
+ rag_result = rag_processor.generate_rag_response(
717
+ user_id, document_id, summary_query, llm_model=model
718
+ )
719
+
720
+ if rag_result["success"]:
721
+ logger.info(f"โœ… ์ž๋™ AI ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ: {len(rag_result['response'])} ๋ฌธ์ž")
722
+ result["auto_response"] = rag_result["response"]
723
+ else:
724
+ logger.warning(f"โš ๏ธ ์ž๋™ AI ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {rag_result.get('error', 'Unknown error')}")
725
+
726
+ except Exception as e:
727
+ logger.error(f"โŒ ์ž๋™ AI ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜: {e}")
728
+
729
+ return DocumentUploadResponse(
730
+ success=result["success"],
731
+ document_id=document_id,
732
+ message=result.get("message", ""),
733
+ chunks=result.get("chunks"),
734
+ latex_count=result.get("latex_count"),
735
+ error=result.get("error"),
736
+ auto_response=result.get("auto_response", "") # ์ž๋™ ์‘๋‹ต ์ถ”๊ฐ€
737
+ )
738
+
739
+ except Exception as e:
740
+ logger.error(f"โŒ ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
741
+ return DocumentUploadResponse(
742
+ success=False,
743
+ document_id=document_id if 'document_id' in locals() else "unknown",
744
+ message="๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
745
+ error=str(e)
746
+ )
747
+
748
+ @app.post("/rag/generate", response_model=RAGResponse)
749
+ async def generate_rag_response(
750
+ query: str = Form(...),
751
+ user_id: str = Form("default_user"),
752
+ document_id: str = Form(...),
753
+ max_length: Optional[int] = Form(None),
754
+ temperature: Optional[float] = Form(None),
755
+ top_p: Optional[float] = Form(None),
756
+ do_sample: Optional[bool] = Form(None)
757
+ ):
758
+ """RAG ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ"""
759
+ start_time = time.time()
760
+
761
+ try:
762
+ # ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
763
+ llm_model = None
764
+ if model is not None and hasattr(model, 'generate_text'):
765
+ llm_model = model
766
+ logger.info("โœ… ๋กœ๋“œ๋œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ RAG ์‘๋‹ต ์ƒ์„ฑ")
767
+ else:
768
+ logger.warning("โš ๏ธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•„ ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‘๋‹ต๋งŒ ์ƒ์„ฑ")
769
+
770
+ # RAG ์‘๋‹ต ์ƒ์„ฑ
771
+ result = rag_processor.generate_rag_response(
772
+ user_id, document_id, query, llm_model=llm_model
773
+ )
774
+
775
+ processing_time = time.time() - start_time
776
+ logger.info(f"๐Ÿ” RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
777
+
778
+ return RAGResponse(
779
+ success=result["success"],
780
+ response=result["response"],
781
+ context=result["context"],
782
+ sources=result["sources"],
783
+ search_results=result["search_results"],
784
+ processing_time=processing_time
785
+ )
786
+
787
+ except Exception as e:
788
+ logger.error(f"โŒ RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
789
+ return RAGResponse(
790
+ success=False,
791
+ response=f"RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
792
+ context="",
793
+ sources=[],
794
+ search_results=0,
795
+ processing_time=0.0
796
+ )
797
+
798
+ @app.post("/rag/generate-hybrid", response_model=RAGResponse)
799
+ async def generate_hybrid_rag_response(
800
+ query: str = Form(...),
801
+ user_id: str = Form("default_user"),
802
+ document_id: str = Form(...),
803
+ image1: UploadFile = File(None),
804
+ image2: UploadFile = File(None),
805
+ image3: UploadFile = File(None),
806
+ image4: UploadFile = File(None),
807
+ image5: UploadFile = File(None),
808
+ max_length: Optional[int] = Form(None),
809
+ temperature: Optional[float] = Form(None),
810
+ top_p: Optional[float] = Form(None),
811
+ do_sample: Optional[bool] = Form(None)
812
+ ):
813
+ """ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ (์ด๋ฏธ์ง€ + ๋ฌธ์„œ)"""
814
+ start_time = time.time()
815
+
816
+ try:
817
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ
818
+ image_files = []
819
+ uploaded_images = [image1, image2, image3, image4, image5]
820
+
821
+ for i, img in enumerate(uploaded_images):
822
+ if img:
823
+ try:
824
+ # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
825
+ import tempfile
826
+ with tempfile.NamedTemporaryFile(
827
+ suffix=f"_{i}.png",
828
+ delete=False,
829
+ prefix="hybrid_image_"
830
+ ) as temp_file:
831
+ image_data = await img.read()
832
+ temp_file.write(image_data)
833
+ image_files.append(temp_file.name)
834
+ logger.info(f"๐Ÿ“ธ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ: {img.filename} -> {temp_file.name}")
835
+ except Exception as e:
836
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
837
+
838
+ # RAG ์‘๋‹ต ์ƒ์„ฑ (์ด๋ฏธ์ง€ ํฌํ•จ)
839
+ result = rag_processor.generate_rag_response(
840
+ user_id, document_id, query,
841
+ llm_model=model, # ์‹ค์ œ ๋ชจ๋ธ ์ธ์Šคํ„ด์Šค ์‚ฌ์šฉ
842
+ image_files=image_files if image_files else None
843
+ )
844
+
845
+ # ์ž„์‹œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ •๋ฆฌ
846
+ for temp_file in image_files:
847
+ try:
848
+ if os.path.exists(temp_file):
849
+ os.remove(temp_file)
850
+ logger.info(f"๐Ÿ—‘๏ธ ์ž„์‹œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์‚ญ์ œ: {temp_file}")
851
+ except Exception as e:
852
+ logger.warning(f"โš ๏ธ ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ ์‹คํŒจ: {e}")
853
+
854
+ processing_time = time.time() - start_time
855
+ logger.info(f"๐Ÿ” ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
856
+
857
+ return RAGResponse(
858
+ success=result["success"],
859
+ response=result["response"],
860
+ context=result["context"],
861
+ sources=result["sources"],
862
+ search_results=result["search_results"],
863
+ processing_time=processing_time
864
+ )
865
+
866
+ except Exception as e:
867
+ logger.error(f"โŒ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
868
+ return RAGResponse(
869
+ success=False,
870
+ response=f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
871
+ context="",
872
+ sources=[],
873
+ search_results=0,
874
+ processing_time=time.time() - start_time
875
+ )
876
+
877
+ @app.get("/documents/{user_id}")
878
+ async def list_user_documents(user_id: str):
879
+ """์‚ฌ์šฉ์ž์˜ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ"""
880
+ try:
881
+ from lily_llm_core.vector_store_manager import vector_store_manager
882
+ documents = vector_store_manager.get_all_documents(user_id)
883
+ return documents
884
+ except Exception as e:
885
+ logger.error(f"โŒ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์‹คํŒจ: {e}")
886
+ return {"documents": [], "total_docs": 0, "error": str(e)}
887
+
888
+ @app.delete("/document/{user_id}/{document_id}")
889
+ async def delete_document(user_id: str, document_id: str):
890
+ """๋ฌธ์„œ ์‚ญ์ œ"""
891
+ try:
892
+ result = rag_processor.delete_document(user_id, document_id)
893
+ return result
894
+ except Exception as e:
895
+ logger.error(f"โŒ ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
896
+ return {"success": False, "error": str(e)}
897
+
898
+ # ์‚ฌ์šฉ์ž ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ
899
+ @app.post("/user/create", response_model=UserResponse)
900
+ async def create_user(
901
+ user_id: str = Form(...),
902
+ username: Optional[str] = Form(None),
903
+ email: Optional[str] = Form(None)
904
+ ):
905
+ """์‚ฌ์šฉ์ž ์ƒ์„ฑ"""
906
+ try:
907
+ success = db_manager.add_user(user_id, username, email)
908
+ if success:
909
+ user_info = db_manager.get_user(user_id)
910
+ return UserResponse(
911
+ success=True,
912
+ user_id=user_id,
913
+ username=user_info.get('username') if user_info else None,
914
+ email=user_info.get('email') if user_info else None,
915
+ created_at=user_info.get('created_at') if user_info else None
916
+ )
917
+ else:
918
+ return UserResponse(success=False, user_id=user_id, error="์‚ฌ์šฉ์ž ์ƒ์„ฑ ์‹คํŒจ")
919
+ except Exception as e:
920
+ logger.error(f"โŒ ์‚ฌ์šฉ์ž ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
921
+ return UserResponse(success=False, user_id=user_id, error=str(e))
922
+
923
+ @app.get("/user/{user_id}", response_model=UserResponse)
924
+ async def get_user_info(user_id: str):
925
+ """์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ"""
926
+ try:
927
+ user_info = db_manager.get_user(user_id)
928
+ if user_info:
929
+ return UserResponse(
930
+ success=True,
931
+ user_id=user_id,
932
+ username=user_info.get('username'),
933
+ email=user_info.get('email'),
934
+ created_at=user_info.get('created_at')
935
+ )
936
+ else:
937
+ return UserResponse(success=False, user_id=user_id, error="์‚ฌ์šฉ์ž๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
938
+ except Exception as e:
939
+ logger.error(f"โŒ ์‚ฌ์šฉ์ž ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
940
+ return UserResponse(success=False, user_id=user_id, error=str(e))
941
+
942
+ # ์„ธ์…˜ ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ
943
+ @app.post("/session/create", response_model=SessionResponse)
944
+ async def create_session(
945
+ user_id: str = Form(...),
946
+ session_name: Optional[str] = Form(None)
947
+ ):
948
+ """์ฑ„ํŒ… ์„ธ์…˜ ์ƒ์„ฑ"""
949
+ try:
950
+ session_id = db_manager.create_chat_session(user_id, session_name)
951
+ if session_id:
952
+ return SessionResponse(
953
+ success=True,
954
+ session_id=session_id,
955
+ session_name=session_name
956
+ )
957
+ else:
958
+ return SessionResponse(success=False, session_id="", error="์„ธ์…˜ ์ƒ์„ฑ ์‹คํŒจ")
959
+ except Exception as e:
960
+ logger.error(f"โŒ ์„ธ์…˜ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
961
+ return SessionResponse(success=False, session_id="", error=str(e))
962
+
963
+ @app.get("/sessions/{user_id}")
964
+ async def list_user_sessions(user_id: str):
965
+ """์‚ฌ์šฉ์ž์˜ ์„ธ์…˜ ๋ชฉ๋ก ์กฐํšŒ"""
966
+ try:
967
+ sessions = db_manager.get_user_sessions(user_id)
968
+ return {
969
+ "success": True,
970
+ "user_id": user_id,
971
+ "sessions": sessions,
972
+ "total_sessions": len(sessions)
973
+ }
974
+ except Exception as e:
975
+ logger.error(f"โŒ ์„ธ์…˜ ๋ชฉ๋ก ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
976
+ return {"success": False, "error": str(e)}
977
+
978
+ # ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์—”๋“œํฌ์ธํŠธ
979
+ @app.post("/chat/message", response_model=ChatMessageResponse)
980
+ async def add_chat_message(
981
+ session_id: str = Form(...),
982
+ user_id: str = Form(...),
983
+ message_type: str = Form(...),
984
+ content: str = Form(...)
985
+ ):
986
+ """์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
987
+ try:
988
+ success = db_manager.add_chat_message(session_id, user_id, message_type, content)
989
+ if success:
990
+ return ChatMessageResponse(
991
+ success=True,
992
+ message_id=0, # ์‹ค์ œ ID๋Š” DB์—์„œ ์ž๋™ ์ƒ์„ฑ
993
+ content=content,
994
+ message_type=message_type,
995
+ timestamp=datetime.now().isoformat()
996
+ )
997
+ else:
998
+ return ChatMessageResponse(
999
+ success=False,
1000
+ message_id=0,
1001
+ content="",
1002
+ message_type="",
1003
+ timestamp="",
1004
+ error="๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์‹คํŒจ"
1005
+ )
1006
+ except Exception as e:
1007
+ logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์˜ค๋ฅ˜: {e}")
1008
+ return ChatMessageResponse(
1009
+ success=False,
1010
+ message_id=0,
1011
+ content="",
1012
+ message_type="",
1013
+ timestamp="",
1014
+ error=str(e)
1015
+ )
1016
+
1017
+ @app.get("/chat/history/{session_id}")
1018
+ async def get_chat_history(session_id: str, limit: int = 50):
1019
+ """์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ"""
1020
+ try:
1021
+ messages = db_manager.get_chat_history(session_id, limit)
1022
+ return {
1023
+ "success": True,
1024
+ "session_id": session_id,
1025
+ "messages": messages,
1026
+ "total_messages": len(messages)
1027
+ }
1028
+ except Exception as e:
1029
+ logger.error(f"โŒ ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
1030
+ return {"success": False, "error": str(e)}
1031
+
1032
+ # ๋ฌธ์„œ ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ (DB ์—ฐ๋™)
1033
+ @app.get("/documents/db/{user_id}")
1034
+ async def list_user_documents_db(user_id: str):
1035
+ """์‚ฌ์šฉ์ž์˜ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ (DB ๊ธฐ๋ฐ˜)"""
1036
+ try:
1037
+ documents = db_manager.get_user_documents(user_id)
1038
+ return {
1039
+ "success": True,
1040
+ "user_id": user_id,
1041
+ "documents": documents,
1042
+ "total_documents": len(documents)
1043
+ }
1044
+ except Exception as e:
1045
+ logger.error(f"โŒ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
1046
+ return {"success": False, "error": str(e)}
1047
+
1048
+ # ์ธ์ฆ ์—”๋“œํฌ์ธํŠธ
1049
+ @app.post("/auth/login", response_model=LoginResponse)
1050
+ async def login(
1051
+ user_id: str = Form(...),
1052
+ password: str = Form(...)
1053
+ ):
1054
+ """์‚ฌ์šฉ์ž ๋กœ๊ทธ์ธ"""
1055
+ try:
1056
+ # ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ
1057
+ user_info = db_manager.get_user(user_id)
1058
+ if not user_info:
1059
+ return LoginResponse(success=False, error="์‚ฌ์šฉ์ž๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
1060
+
1061
+ # ๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฒ€์ฆ (๊ฐ„๋‹จํ•œ ๊ฒ€์ฆ - ์‹ค์ œ๋กœ๋Š” DB์— ์ €์žฅ๋œ ํ•ด์‹œ์™€ ๋น„๊ต)
1062
+ if not auth_manager.verify_password(password, "dummy_hash"): # ์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” DB์˜ ํ•ด์‹œ์™€ ๋น„๊ต
1063
+ return LoginResponse(success=False, error="๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค")
1064
+
1065
+ # ํ† ํฐ ์ƒ์„ฑ
1066
+ tokens = auth_manager.create_user_tokens(user_id, user_info.get('username'))
1067
+
1068
+ return LoginResponse(
1069
+ success=True,
1070
+ access_token=tokens['access_token'],
1071
+ refresh_token=tokens['refresh_token'],
1072
+ token_type=tokens['token_type'],
1073
+ user_id=user_id,
1074
+ username=user_info.get('username')
1075
+ )
1076
+ except Exception as e:
1077
+ logger.error(f"โŒ ๋กœ๊ทธ์ธ ์˜ค๋ฅ˜: {e}")
1078
+ return LoginResponse(success=False, error=str(e))
1079
+
1080
+ @app.post("/auth/refresh", response_model=TokenResponse)
1081
+ async def refresh_token(refresh_token: str = Form(...)):
1082
+ """์•ก์„ธ์Šค ํ† ํฐ ๊ฐฑ์‹ """
1083
+ try:
1084
+ new_access_token = auth_manager.refresh_access_token(refresh_token)
1085
+
1086
+ if new_access_token:
1087
+ return TokenResponse(
1088
+ success=True,
1089
+ access_token=new_access_token,
1090
+ token_type="bearer"
1091
+ )
1092
+ else:
1093
+ return TokenResponse(success=False, error="์œ ํšจํ•˜์ง€ ์•Š์€ ๋ฆฌํ”„๋ ˆ์‹œ ํ† ํฐ์ž…๋‹ˆ๋‹ค")
1094
+ except Exception as e:
1095
+ logger.error(f"โŒ ํ† ํฐ ๊ฐฑ์‹  ์˜ค๋ฅ˜: {e}")
1096
+ return TokenResponse(success=False, error=str(e))
1097
+
1098
+ @app.post("/auth/register", response_model=LoginResponse)
1099
+ async def register(
1100
+ user_id: str = Form(...),
1101
+ username: str = Form(...),
1102
+ password: str = Form(...),
1103
+ email: Optional[str] = Form(None)
1104
+ ):
1105
+ """์‚ฌ์šฉ์ž ๋“ฑ๋ก"""
1106
+ try:
1107
+ # ๊ธฐ์กด ์‚ฌ์šฉ์ž ํ™•์ธ
1108
+ existing_user = db_manager.get_user(user_id)
1109
+ if existing_user:
1110
+ return LoginResponse(success=False, error="์ด๋ฏธ ์กด์žฌํ•˜๋Š” ์‚ฌ์šฉ์ž ID์ž…๋‹ˆ๋‹ค")
1111
+
1112
+ # ๋น„๋ฐ€๋ฒˆํ˜ธ ํ•ด์‹ฑ
1113
+ hashed_password = auth_manager.hash_password(password)
1114
+
1115
+ # ์‚ฌ์šฉ์ž ์ƒ์„ฑ (์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” hashed_password๋ฅผ DB์— ์ €์žฅ)
1116
+ success = db_manager.add_user(user_id, username, email)
1117
+
1118
+ if success:
1119
+ # ํ† ํฐ ์ƒ์„ฑ
1120
+ tokens = auth_manager.create_user_tokens(user_id, username)
1121
+
1122
+ return LoginResponse(
1123
+ success=True,
1124
+ access_token=tokens['access_token'],
1125
+ refresh_token=tokens['refresh_token'],
1126
+ token_type=tokens['token_type'],
1127
+ user_id=user_id,
1128
+ username=username
1129
+ )
1130
+ else:
1131
+ return LoginResponse(success=False, error="์‚ฌ์šฉ์ž ๋“ฑ๋ก์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค")
1132
+ except Exception as e:
1133
+ logger.error(f"โŒ ์‚ฌ์šฉ์ž ๋“ฑ๋ก ์˜ค๋ฅ˜: {e}")
1134
+ return LoginResponse(success=False, error=str(e))
1135
+
1136
+ @app.get("/auth/me")
1137
+ async def get_current_user_info(credentials: HTTPAuthorizationCredentials = Depends(auth_manager.security)):
1138
+ """ํ˜„์žฌ ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ"""
1139
+ try:
1140
+ user_info = auth_manager.get_current_user(credentials)
1141
+ return {
1142
+ "success": True,
1143
+ "user_id": user_info.get("sub"),
1144
+ "username": user_info.get("username"),
1145
+ "token_type": user_info.get("type")
1146
+ }
1147
+ except Exception as e:
1148
+ logger.error(f"โŒ ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
1149
+ return {"success": False, "error": str(e)}
1150
+
1151
+ # WebSocket ์‹ค์‹œ๊ฐ„ ์ฑ„ํŒ… ์—”๋“œํฌ์ธํŠธ
1152
+ @app.websocket("/ws/{user_id}")
1153
+ async def websocket_endpoint(websocket: WebSocket, user_id: str, session_id: str = None):
1154
+ """WebSocket ์‹ค์‹œ๊ฐ„ ์ฑ„ํŒ… ์—”๋“œํฌ์ธํŠธ"""
1155
+ try:
1156
+ # ์—ฐ๊ฒฐ ์ˆ˜๋ฝ
1157
+ await connection_manager.connect(websocket, user_id, session_id)
1158
+
1159
+ # ์—ฐ๊ฒฐ ์ƒํƒœ ๋ธŒ๋กœ๋“œ์บ์ŠคํŠธ
1160
+ await connection_manager.broadcast_message({
1161
+ "type": "user_connected",
1162
+ "user_id": user_id,
1163
+ "session_id": session_id,
1164
+ "timestamp": datetime.now().isoformat()
1165
+ }, exclude_user=user_id)
1166
+
1167
+ # ๋ฉ”์‹œ์ง€ ์ˆ˜์‹  ๋ฃจํ”„
1168
+ while True:
1169
+ try:
1170
+ # ๋ฉ”์‹œ์ง€ ์ˆ˜์‹ 
1171
+ data = await websocket.receive_text()
1172
+ message_data = json.loads(data)
1173
+
1174
+ # ๋ฉ”์‹œ์ง€ ํƒ€์ž…์— ๋”ฐ๋ฅธ ์ฒ˜๋ฆฌ
1175
+ message_type = message_data.get("type", "chat")
1176
+
1177
+ if message_type == "chat":
1178
+ # ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ
1179
+ content = message_data.get("content", "")
1180
+ session_id = message_data.get("session_id")
1181
+
1182
+ # DB์— ๋ฉ”์‹œ์ง€ ์ €์žฅ
1183
+ if session_id:
1184
+ db_manager.add_chat_message(
1185
+ session_id=session_id,
1186
+ user_id=user_id,
1187
+ message_type="user",
1188
+ content=content
1189
+ )
1190
+
1191
+ # ์„ธ์…˜์˜ ๋‹ค๋ฅธ ์‚ฌ์šฉ์ž๋“ค์—๊ฒŒ ๋ฉ”์‹œ์ง€ ์ „์†ก
1192
+ await connection_manager.send_session_message({
1193
+ "type": "chat_message",
1194
+ "user_id": user_id,
1195
+ "content": content,
1196
+ "session_id": session_id,
1197
+ "timestamp": datetime.now().isoformat()
1198
+ }, session_id, exclude_user=user_id)
1199
+
1200
+ # AI ์‘๋‹ต ์ƒ์„ฑ (์„ ํƒ์ )
1201
+ if message_data.get("generate_ai_response", False):
1202
+ # AI ์‘๋‹ต ์ƒ์„ฑ ๋กœ์ง
1203
+ ai_response = await generate_ai_response(content, user_id)
1204
+
1205
+ # AI ์‘๋‹ต์„ DB์— ์ €์žฅ
1206
+ if session_id:
1207
+ db_manager.add_chat_message(
1208
+ session_id=session_id,
1209
+ user_id="ai_assistant",
1210
+ message_type="assistant",
1211
+ content=ai_response
1212
+ )
1213
+
1214
+ # AI ์‘๋‹ต์„ ์„ธ์…˜ ์‚ฌ์šฉ์ž๋“ค์—๊ฒŒ ์ „์†ก
1215
+ await connection_manager.send_session_message({
1216
+ "type": "ai_response",
1217
+ "user_id": "ai_assistant",
1218
+ "content": ai_response,
1219
+ "session_id": session_id,
1220
+ "timestamp": datetime.now().isoformat()
1221
+ }, session_id)
1222
+
1223
+ elif message_type == "typing":
1224
+ # ํƒ€์ดํ•‘ ์ƒํƒœ ์ „์†ก
1225
+ await connection_manager.send_session_message({
1226
+ "type": "user_typing",
1227
+ "user_id": user_id,
1228
+ "session_id": message_data.get("session_id"),
1229
+ "timestamp": datetime.now().isoformat()
1230
+ }, message_data.get("session_id"), exclude_user=user_id)
1231
+
1232
+ elif message_type == "join_session":
1233
+ # ์„ธ์…˜ ์ฐธ์—ฌ
1234
+ new_session_id = message_data.get("session_id")
1235
+ if new_session_id:
1236
+ # ๊ธฐ์กด ์„ธ์…˜์—์„œ ์ œ๊ฑฐ
1237
+ if user_id in connection_manager.connection_info:
1238
+ old_session_id = connection_manager.connection_info[user_id].get("session_id")
1239
+ if old_session_id and old_session_id in connection_manager.session_connections:
1240
+ connection_manager.session_connections[old_session_id].discard(user_id)
1241
+
1242
+ # ์ƒˆ ์„ธ์…˜์— ์ถ”๊ฐ€
1243
+ if new_session_id not in connection_manager.session_connections:
1244
+ connection_manager.session_connections[new_session_id] = set()
1245
+ connection_manager.session_connections[new_session_id].add(user_id)
1246
+
1247
+ # ์—ฐ๊ฒฐ ์ •๋ณด ์—…๋ฐ์ดํŠธ
1248
+ if user_id in connection_manager.connection_info:
1249
+ connection_manager.connection_info[user_id]["session_id"] = new_session_id
1250
+
1251
+ # ์„ธ์…˜ ์ฐธ์—ฌ ์•Œ๋ฆผ
1252
+ await connection_manager.send_session_message({
1253
+ "type": "user_joined_session",
1254
+ "user_id": user_id,
1255
+ "session_id": new_session_id,
1256
+ "timestamp": datetime.now().isoformat()
1257
+ }, new_session_id, exclude_user=user_id)
1258
+
1259
+ logger.info(f"๐Ÿ“จ WebSocket ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ: {user_id} - {message_type}")
1260
+
1261
+ except WebSocketDisconnect:
1262
+ logger.info(f"๐Ÿ”Œ WebSocket ์—ฐ๊ฒฐ ๋Š๊น€: {user_id}")
1263
+ break
1264
+ except json.JSONDecodeError:
1265
+ logger.warning(f"โš ๏ธ ์ž˜๋ชป๋œ JSON ํ˜•์‹: {user_id}")
1266
+ await websocket.send_text(json.dumps({
1267
+ "type": "error",
1268
+ "message": "์ž˜๋ชป๋œ ๋ฉ”์‹œ์ง€ ํ˜•์‹์ž…๋‹ˆ๋‹ค."
1269
+ }))
1270
+ except Exception as e:
1271
+ logger.error(f"โŒ WebSocket ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
1272
+ await websocket.send_text(json.dumps({
1273
+ "type": "error",
1274
+ "message": "๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
1275
+ }))
1276
+
1277
+ except WebSocketDisconnect:
1278
+ logger.info(f"๐Ÿ”Œ WebSocket ์—ฐ๊ฒฐ ๋Š๊น€: {user_id}")
1279
+ except Exception as e:
1280
+ logger.error(f"โŒ WebSocket ์—”๋“œํฌ์ธํŠธ ์˜ค๋ฅ˜: {e}")
1281
+ finally:
1282
+ # ์—ฐ๊ฒฐ ํ•ด์ œ
1283
+ connection_manager.disconnect(user_id)
1284
+
1285
+ # ์—ฐ๊ฒฐ ํ•ด์ œ ์•Œ๋ฆผ
1286
+ await connection_manager.broadcast_message({
1287
+ "type": "user_disconnected",
1288
+ "user_id": user_id,
1289
+ "timestamp": datetime.now().isoformat()
1290
+ }, exclude_user=user_id)
1291
+
1292
+ async def generate_ai_response(content: str, user_id: str) -> str:
1293
+ """AI ์‘๋‹ต ์ƒ์„ฑ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)"""
1294
+ try:
1295
+ # ํ˜„์žฌ ๋ชจ๋ธ๋กœ ์‘๋‹ต ์ƒ์„ฑ
1296
+ response = await generate_sync(content, user_id)
1297
+ return response.get("response", "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
1298
+ except Exception as e:
1299
+ logger.error(f"โŒ AI ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
1300
+ return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
1301
+
1302
+ # WebSocket ์ƒํƒœ ์กฐํšŒ ์—”๋“œํฌ์ธํŠธ
1303
+ @app.get("/ws/status")
1304
+ async def get_websocket_status():
1305
+ """WebSocket ์—ฐ๊ฒฐ ์ƒํƒœ ์กฐํšŒ"""
1306
+ return {
1307
+ "active_connections": connection_manager.get_connection_count(),
1308
+ "active_users": connection_manager.get_active_users(),
1309
+ "sessions": list(connection_manager.session_connections.keys())
1310
+ }
1311
+
1312
+ # Celery ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์—”๋“œํฌ์ธํŠธ
1313
+ @app.post("/tasks/document/process")
1314
+ async def start_document_processing(
1315
+ user_id: str = Form(...),
1316
+ document_id: str = Form(...),
1317
+ file_path: str = Form(...),
1318
+ file_type: str = Form(...)
1319
+ ):
1320
+ """๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
1321
+ try:
1322
+ task = process_document_async.delay(user_id, document_id, file_path, file_type)
1323
+
1324
+ return {
1325
+ "success": True,
1326
+ "task_id": task.id,
1327
+ "status": "started",
1328
+ "message": "๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1329
+ }
1330
+ except Exception as e:
1331
+ logger.error(f"โŒ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
1332
+ return {
1333
+ "success": False,
1334
+ "error": str(e)
1335
+ }
1336
+
1337
+ @app.post("/tasks/ai/generate")
1338
+ async def start_ai_generation(
1339
+ user_id: str = Form(...),
1340
+ session_id: str = Form(...),
1341
+ prompt: str = Form(...),
1342
+ model_id: Optional[str] = Form(None)
1343
+ ):
1344
+ """AI ์‘๋‹ต ์ƒ์„ฑ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
1345
+ try:
1346
+ task = generate_ai_response_async.delay(user_id, session_id, prompt, model_id)
1347
+
1348
+ return {
1349
+ "success": True,
1350
+ "task_id": task.id,
1351
+ "status": "started",
1352
+ "message": "AI ์‘๋‹ต ์ƒ์„ฑ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1353
+ }
1354
+ except Exception as e:
1355
+ logger.error(f"โŒ AI ์‘๋‹ต ์ƒ์„ฑ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
1356
+ return {
1357
+ "success": False,
1358
+ "error": str(e)
1359
+ }
1360
+
1361
+ @app.post("/tasks/rag/query")
1362
+ async def start_rag_query(
1363
+ user_id: str = Form(...),
1364
+ query: str = Form(...),
1365
+ document_id: str = Form(...)
1366
+ ):
1367
+ """RAG ์ฟผ๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
1368
+ try:
1369
+ task = rag_query_async.delay(user_id, query, document_id)
1370
+
1371
+ return {
1372
+ "success": True,
1373
+ "task_id": task.id,
1374
+ "status": "started",
1375
+ "message": "RAG ์ฟผ๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1376
+ }
1377
+ except Exception as e:
1378
+ logger.error(f"โŒ RAG ์ฟผ๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
1379
+ return {
1380
+ "success": False,
1381
+ "error": str(e)
1382
+ }
1383
+
1384
+ @app.post("/tasks/documents/batch")
1385
+ async def start_batch_processing(
1386
+ user_id: str = Form(...),
1387
+ document_ids: str = Form(...) # JSON ๋ฌธ์ž์—ด๋กœ ์ „๋‹ฌ
1388
+ ):
1389
+ """๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
1390
+ try:
1391
+ import json
1392
+ doc_ids = json.loads(document_ids)
1393
+
1394
+ task = batch_process_documents_async.delay(user_id, doc_ids)
1395
+
1396
+ return {
1397
+ "success": True,
1398
+ "task_id": task.id,
1399
+ "status": "started",
1400
+ "message": f"๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ({len(doc_ids)}๊ฐœ ๋ฌธ์„œ)"
1401
+ }
1402
+ except Exception as e:
1403
+ logger.error(f"โŒ ๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
1404
+ return {
1405
+ "success": False,
1406
+ "error": str(e)
1407
+ }
1408
+
1409
+ @app.get("/tasks/{task_id}")
1410
+ async def get_task_status_endpoint(task_id: str):
1411
+ """์ž‘์—… ์ƒํƒœ ์กฐํšŒ"""
1412
+ try:
1413
+ status = get_task_status(task_id)
1414
+
1415
+ if status:
1416
+ return {
1417
+ "success": True,
1418
+ "task_id": task_id,
1419
+ "status": status["status"],
1420
+ "result": status["result"],
1421
+ "info": status["info"]
1422
+ }
1423
+ else:
1424
+ return {
1425
+ "success": False,
1426
+ "error": "์ž‘์—…์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
1427
+ }
1428
+ except Exception as e:
1429
+ logger.error(f"โŒ ์ž‘์—… ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
1430
+ return {
1431
+ "success": False,
1432
+ "error": str(e)
1433
+ }
1434
+
1435
+ @app.delete("/tasks/{task_id}")
1436
+ async def cancel_task_endpoint(task_id: str):
1437
+ """์ž‘์—… ์ทจ์†Œ"""
1438
+ try:
1439
+ success = cancel_task(task_id)
1440
+
1441
+ if success:
1442
+ return {
1443
+ "success": True,
1444
+ "task_id": task_id,
1445
+ "message": "์ž‘์—…์ด ์ทจ์†Œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1446
+ }
1447
+ else:
1448
+ return {
1449
+ "success": False,
1450
+ "error": "์ž‘์—… ์ทจ์†Œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
1451
+ }
1452
+ except Exception as e:
1453
+ logger.error(f"โŒ ์ž‘์—… ์ทจ์†Œ ์‹คํŒจ: {e}")
1454
+ return {
1455
+ "success": False,
1456
+ "error": str(e)
1457
+ }
1458
+
1459
+ # ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์—”๋“œํฌ์ธํŠธ
1460
+ @app.post("/monitoring/start")
1461
+ async def start_performance_monitoring():
1462
+ """์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘"""
1463
+ try:
1464
+ performance_monitor.start_monitoring()
1465
+ return {"message": "์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
1466
+ except Exception as e:
1467
+ logger.error(f"๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘ ์‹คํŒจ: {e}")
1468
+ raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘ ์‹คํŒจ: {str(e)}")
1469
+
1470
+ @app.post("/monitoring/stop")
1471
+ async def stop_performance_monitoring():
1472
+ """์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€"""
1473
+ try:
1474
+ performance_monitor.stop_monitoring()
1475
+ return {"message": "์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง์ด ์ค‘์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
1476
+ except Exception as e:
1477
+ logger.error(f"๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€ ์‹คํŒจ: {e}")
1478
+ raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€ ์‹คํŒจ: {str(e)}")
1479
+
1480
+ @app.get("/monitoring/status")
1481
+ async def get_monitoring_status():
1482
+ """๋ชจ๋‹ˆํ„ฐ๋ง ์ƒํƒœ ์กฐํšŒ"""
1483
+ try:
1484
+ summary = performance_monitor.get_performance_summary()
1485
+ return summary
1486
+ except Exception as e:
1487
+ logger.error(f"๋ชจ๋‹ˆ๏ฟฝ๏ฟฝ๋ง ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
1488
+ raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {str(e)}")
1489
+
1490
+ @app.get("/monitoring/health")
1491
+ async def get_system_health():
1492
+ """์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ"""
1493
+ try:
1494
+ health = performance_monitor.get_system_health()
1495
+ return {
1496
+ "status": health.status,
1497
+ "cpu_health": health.cpu_health,
1498
+ "memory_health": health.memory_health,
1499
+ "disk_health": health.disk_health,
1500
+ "network_health": health.network_health,
1501
+ "recommendations": health.recommendations
1502
+ }
1503
+ except Exception as e:
1504
+ logger.error(f"์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
1505
+ raise HTTPException(status_code=500, detail=f"์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {str(e)}")
1506
+
1507
+ @app.post("/monitoring/export")
1508
+ async def export_performance_metrics(file_path: str = "performance_metrics.json"):
1509
+ """์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ"""
1510
+ try:
1511
+ performance_monitor.export_metrics(file_path)
1512
+ return {"message": f"์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ์ด {file_path}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
1513
+ except Exception as e:
1514
+ logger.error(f"๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {e}")
1515
+ raise HTTPException(status_code=500, detail=f"๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {str(e)}")
1516
+
1517
+ # ============================================================================
1518
+ # ์ด๋ฏธ์ง€ OCR ์ „์šฉ API ์—”๋“œํฌ์ธํŠธ (๊ธฐ์กด ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‹œ์Šคํ…œ๊ณผ ์™„์ „ํžˆ ๋ถ„๋ฆฌ)
1519
+ # ============================================================================
1520
+
1521
+ @app.post("/image-ocr/upload", response_model=DocumentUploadResponse)
1522
+ async def upload_image_document(
1523
+ file: UploadFile = File(...),
1524
+ user_id: str = Form("default_user"),
1525
+ document_id: Optional[str] = Form(None)
1526
+ ):
1527
+ """์ด๋ฏธ์ง€ OCR ์ „์šฉ ๋ฌธ์„œ ์—…๋กœ๋“œ"""
1528
+ start_time = time.time()
1529
+
1530
+ try:
1531
+ # ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
1532
+ if not document_id:
1533
+ import uuid
1534
+ document_id = str(uuid.uuid4())[:8]
1535
+
1536
+ # ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
1537
+ temp_file_path = f"./temp_image_{document_id}_{file.filename}"
1538
+ with open(temp_file_path, "wb") as f:
1539
+ content = await file.read()
1540
+ f.write(content)
1541
+
1542
+ # ์ด๋ฏธ์ง€ OCR ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
1543
+ result = image_rag_processor.process_and_store_image_document(
1544
+ user_id, document_id, temp_file_path
1545
+ )
1546
+
1547
+ # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
1548
+ import os
1549
+ if os.path.exists(temp_file_path):
1550
+ os.remove(temp_file_path)
1551
+
1552
+ processing_time = time.time() - start_time
1553
+ logger.info(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
1554
+
1555
+ return DocumentUploadResponse(
1556
+ success=result["success"],
1557
+ document_id=document_id,
1558
+ message=result.get("message", ""),
1559
+ chunks=result.get("chunks"),
1560
+ latex_count=result.get("latex_count"),
1561
+ error=result.get("error"),
1562
+ auto_response=result.get("auto_response", "")
1563
+ )
1564
+
1565
+ except Exception as e:
1566
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
1567
+ return DocumentUploadResponse(
1568
+ success=False,
1569
+ document_id=document_id if 'document_id' in locals() else "unknown",
1570
+ message="์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
1571
+ error=str(e)
1572
+ )
1573
+
1574
+ @app.post("/image-ocr/generate", response_model=RAGResponse)
1575
+ async def generate_image_ocr_response(
1576
+ query: str = Form(...),
1577
+ user_id: str = Form("default_user"),
1578
+ document_id: str = Form(...)
1579
+ ):
1580
+ """์ด๋ฏธ์ง€ OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ"""
1581
+ start_time = time.time()
1582
+
1583
+ try:
1584
+ # ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ
1585
+ result = image_rag_processor.generate_image_rag_response(
1586
+ user_id, document_id, query
1587
+ )
1588
+
1589
+ processing_time = time.time() - start_time
1590
+ result["processing_time"] = processing_time
1591
+
1592
+ logger.info(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
1593
+ return result
1594
+
1595
+ except Exception as e:
1596
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
1597
+ return RAGResponse(
1598
+ success=False,
1599
+ response=f"์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
1600
+ context="",
1601
+ sources=[],
1602
+ search_results=0,
1603
+ processing_time=time.time() - start_time
1604
+ )
1605
+
1606
+ @app.get("/image-ocr/document/{user_id}/{document_id}")
1607
+ async def get_image_document_info(user_id: str, document_id: str):
1608
+ """์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
1609
+ try:
1610
+ result = image_rag_processor.get_image_document_info(user_id, document_id)
1611
+ return result
1612
+ except Exception as e:
1613
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์ •๋ณด ๏ฟฝ๏ฟฝ๏ฟฝํšŒ ์‹คํŒจ: {e}")
1614
+ return {
1615
+ "success": False,
1616
+ "error": str(e)
1617
+ }
1618
+
1619
+ @app.delete("/image-ocr/document/{user_id}/{document_id}")
1620
+ async def delete_image_document(user_id: str, document_id: str):
1621
+ """์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ"""
1622
+ try:
1623
+ # ๋ฒกํ„ฐ ์Šคํ† ์–ด์—์„œ ๋ฌธ์„œ ์‚ญ์ œ
1624
+ success = vector_store_manager.delete_document(user_id, document_id)
1625
+
1626
+ if success:
1627
+ return {
1628
+ "success": True,
1629
+ "message": "์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1630
+ }
1631
+ else:
1632
+ return {
1633
+ "success": False,
1634
+ "error": "์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
1635
+ }
1636
+ except Exception as e:
1637
+ logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
1638
+ return {
1639
+ "success": False,
1640
+ "error": str(e)
1641
+ }
1642
+
1643
+ # ============================================================================
1644
+ # LaTeX-OCR ์ „์šฉ API ์—”๋“œํฌ์ธํŠธ (์ˆ˜ํ•™ ์ˆ˜์‹ ์ธ์‹ ๊ธฐ๋Šฅ ํฌํ•จ)
1645
+ # ============================================================================
1646
+
1647
+ @app.post("/latex-ocr/upload", response_model=DocumentUploadResponse)
1648
+ async def upload_latex_document(
1649
+ file: UploadFile = File(...),
1650
+ user_id: str = Form("default_user"),
1651
+ document_id: Optional[str] = Form(None)
1652
+ ):
1653
+ """LaTeX-OCR ์ „์šฉ ๋ฌธ์„œ ์—…๋กœ๋“œ"""
1654
+ start_time = time.time()
1655
+
1656
+ try:
1657
+ # ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
1658
+ if not document_id:
1659
+ import uuid
1660
+ document_id = str(uuid.uuid4())[:8]
1661
+
1662
+ # ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
1663
+ temp_file_path = f"./temp_latex_{document_id}_{file.filename}"
1664
+ with open(temp_file_path, "wb") as f:
1665
+ content = await file.read()
1666
+ f.write(content)
1667
+
1668
+ # LaTeX-OCR ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
1669
+ result = latex_rag_processor.process_and_store_latex_document(
1670
+ user_id, document_id, temp_file_path
1671
+ )
1672
+
1673
+ # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
1674
+ import os
1675
+ if os.path.exists(temp_file_path):
1676
+ os.remove(temp_file_path)
1677
+
1678
+ processing_time = time.time() - start_time
1679
+ logger.info(f"๐Ÿงฎ LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
1680
+
1681
+ return DocumentUploadResponse(
1682
+ success=result["success"],
1683
+ document_id=document_id,
1684
+ message=result.get("message", ""),
1685
+ chunks=result.get("chunks"),
1686
+ latex_count=result.get("latex_count"),
1687
+ error=result.get("error"),
1688
+ auto_response=result.get("auto_response", "")
1689
+ )
1690
+
1691
+ except Exception as e:
1692
+ logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
1693
+ return DocumentUploadResponse(
1694
+ success=False,
1695
+ document_id=document_id if 'document_id' in locals() else "unknown",
1696
+ message="LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
1697
+ error=str(e)
1698
+ )
1699
+
1700
+ @app.post("/latex-ocr/generate", response_model=RAGResponse)
1701
+ async def generate_latex_ocr_response(
1702
+ query: str = Form(...),
1703
+ user_id: str = Form("default_user"),
1704
+ document_id: str = Form(...)
1705
+ ):
1706
+ """LaTeX-OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ"""
1707
+ start_time = time.time()
1708
+
1709
+ try:
1710
+ # LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ
1711
+ result = latex_rag_processor.generate_latex_rag_response(
1712
+ user_id, document_id, query
1713
+ )
1714
+
1715
+ processing_time = time.time() - start_time
1716
+ result["processing_time"] = processing_time
1717
+
1718
+ logger.info(f"๐Ÿงฎ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
1719
+ return result
1720
+
1721
+ except Exception as e:
1722
+ logger.error(f"โŒ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
1723
+ return RAGResponse(
1724
+ success=False,
1725
+ response=f"LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
1726
+ context="",
1727
+ sources=[],
1728
+ search_results=0,
1729
+ processing_time=time.time() - start_time
1730
+ )
1731
+
1732
+ @app.get("/latex-ocr/document/{user_id}/{document_id}")
1733
+ async def get_latex_document_info(user_id: str, document_id: str):
1734
+ """LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
1735
+ try:
1736
+ result = latex_rag_processor.get_latex_document_info(user_id, document_id)
1737
+ return result
1738
+ except Exception as e:
1739
+ logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
1740
+ return {
1741
+ "success": False,
1742
+ "error": str(e)
1743
+ }
1744
+
1745
+ @app.delete("/latex-ocr/document/{user_id}/{document_id}")
1746
+ async def delete_latex_document(user_id: str, document_id: str):
1747
+ """LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ"""
1748
+ try:
1749
+ # ๋ฒกํ„ฐ ์Šคํ† ์–ด์—์„œ ๋ฌธ์„œ ์‚ญ์ œ
1750
+ success = vector_store_manager.delete_document(user_id, document_id)
1751
+
1752
+ if success:
1753
+ return {
1754
+ "success": True,
1755
+ "message": "LaTeX-OCR ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
1756
+ }
1757
+ else:
1758
+ return {
1759
+ "success": False,
1760
+ "error": "LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
1761
+ }
1762
+ except Exception as e:
1763
+ logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
1764
+ return {
1765
+ "success": False,
1766
+ "error": str(e)
1767
+ }
1768
+
1769
+ # ============================================================================
1770
+ # LaTeX-OCR + FAISS ํ†ตํ•ฉ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
1771
+ # ============================================================================
1772
+
1773
+ # LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
1774
+ latex_ocr_faiss_simple = None
1775
+ latex_ocr_faiss_integrated = None
1776
+
1777
+ def init_latex_ocr_faiss_systems():
1778
+ """LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”"""
1779
+ global latex_ocr_faiss_simple, latex_ocr_faiss_integrated
1780
+ try:
1781
+ latex_ocr_faiss_simple = LatexOCRFAISSSimple()
1782
+ latex_ocr_faiss_integrated = LatexOCRFAISSIntegrated()
1783
+ logger.info("โœ… LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
1784
+ except Exception as e:
1785
+ logger.error(f"โŒ LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
1786
+
1787
+ @app.post("/latex-ocr-faiss/process", response_model=DocumentUploadResponse)
1788
+ async def process_pdf_with_latex_faiss(
1789
+ file: UploadFile = File(...),
1790
+ user_id: str = Form("default_user"),
1791
+ system_type: str = Form("simple") # "simple" ๋˜๋Š” "integrated"
1792
+ ):
1793
+ """PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ ๋ฐ FAISS ์ €์žฅ"""
1794
+ try:
1795
+ # ํŒŒ์ผ ์ €์žฅ
1796
+ upload_dir = Path("uploads/latex_ocr_faiss")
1797
+ upload_dir.mkdir(parents=True, exist_ok=True)
1798
+
1799
+ file_path = upload_dir / f"{user_id}_{file.filename}"
1800
+ with open(file_path, "wb") as f:
1801
+ content = await file.read()
1802
+ f.write(content)
1803
+
1804
+ # ์‹œ์Šคํ…œ ์„ ํƒ
1805
+ if system_type == "simple":
1806
+ if not latex_ocr_faiss_simple:
1807
+ init_latex_ocr_faiss_systems()
1808
+ system = latex_ocr_faiss_simple
1809
+ else:
1810
+ if not latex_ocr_faiss_integrated:
1811
+ init_latex_ocr_faiss_systems()
1812
+ system = latex_ocr_faiss_integrated
1813
+
1814
+ # PDF ์ฒ˜๋ฆฌ
1815
+ result = system.process_pdf_with_latex(str(file_path), user_id)
1816
+
1817
+ if result["success"]:
1818
+ return DocumentUploadResponse(
1819
+ success=True,
1820
+ document_id=f"latex_ocr_faiss_{user_id}_{file.filename}",
1821
+ message=f"LaTeX ์ˆ˜์‹ {result['latex_count']}๊ฐœ ์ถ”์ถœ ์™„๋ฃŒ",
1822
+ chunks=result['latex_count'],
1823
+ latex_count=result['latex_count']
1824
+ )
1825
+ else:
1826
+ return DocumentUploadResponse(
1827
+ success=False,
1828
+ document_id="",
1829
+ message="LaTeX ์ˆ˜์‹ ์ถ”์ถœ ์‹คํŒจ",
1830
+ error=result.get("error", "LaTeX ์ˆ˜์‹ ์ถ”์ถœ ์‹คํŒจ")
1831
+ )
1832
+
1833
+ except Exception as e:
1834
+ logger.error(f"LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
1835
+ return DocumentUploadResponse(
1836
+ success=False,
1837
+ document_id="",
1838
+ message="์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค",
1839
+ error=f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
1840
+ )
1841
+
1842
+ @app.post("/latex-ocr-faiss/search", response_model=RAGResponse)
1843
+ async def search_latex_formulas(
1844
+ query: str = Form(...),
1845
+ user_id: str = Form("default_user"),
1846
+ document_path: Optional[str] = Form(None),
1847
+ system_type: str = Form("simple"),
1848
+ k: int = Form(5)
1849
+ ):
1850
+ """์ €์žฅ๋œ LaTeX ์ˆ˜์‹ ๊ฒ€์ƒ‰"""
1851
+ try:
1852
+ # ์‹œ์Šคํ…œ ์„ ํƒ
1853
+ if system_type == "simple":
1854
+ if not latex_ocr_faiss_simple:
1855
+ init_latex_ocr_faiss_systems()
1856
+ system = latex_ocr_faiss_simple
1857
+ else:
1858
+ if not latex_ocr_faiss_integrated:
1859
+ init_latex_ocr_faiss_systems()
1860
+ system = latex_ocr_faiss_integrated
1861
+
1862
+ # ์ˆ˜์‹ ๊ฒ€์ƒ‰
1863
+ search_result = system.search_formulas(query, user_id, document_path, k)
1864
+
1865
+ if search_result["success"]:
1866
+ # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์‘๋‹ต ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
1867
+ context = "\n".join([f"์ˆ˜์‹: {result['formula']} (์œ ์‚ฌ๋„: {result['similarity']:.3f})"
1868
+ for result in search_result['results']])
1869
+
1870
+ sources = [{"formula": result['formula'], "similarity": result['similarity'],
1871
+ "page": result.get('page', 1)} for result in search_result['results']]
1872
+
1873
+ return RAGResponse(
1874
+ success=True,
1875
+ response=f"๊ฒ€์ƒ‰๋œ ์ˆ˜์‹ {search_result['search_results']}๊ฐœ๋ฅผ ์ฐพ์•˜์Šต๋‹ˆ๋‹ค.",
1876
+ context=context,
1877
+ sources=sources,
1878
+ search_results=search_result['search_results'],
1879
+ processing_time=0.0 # ์‹ค์ œ ์ฒ˜๋ฆฌ ์‹œ๊ฐ„ ์ธก์ • ํ•„์š”
1880
+ )
1881
+ else:
1882
+ return RAGResponse(
1883
+ success=False,
1884
+ response="์ˆ˜์‹ ๊ฒ€์ƒ‰์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.",
1885
+ context="",
1886
+ sources=[],
1887
+ search_results=0,
1888
+ processing_time=0.0,
1889
+ error=search_result.get("error", "๊ฒ€์ƒ‰ ์‹คํŒจ")
1890
+ )
1891
+
1892
+ except Exception as e:
1893
+ logger.error(f"LaTeX ์ˆ˜์‹ ๊ฒ€์ƒ‰ ์˜ค๋ฅ˜: {e}")
1894
+ return RAGResponse(
1895
+ success=False,
1896
+ response="๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
1897
+ context="",
1898
+ sources=[],
1899
+ search_results=0,
1900
+ processing_time=0.0,
1901
+ error=str(e)
1902
+ )
1903
+
1904
+ @app.get("/latex-ocr-faiss/status")
1905
+ async def get_latex_ocr_faiss_status():
1906
+ """LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ƒํƒœ ํ™•์ธ"""
1907
+ try:
1908
+ simple_status = latex_ocr_faiss_simple is not None
1909
+ integrated_status = latex_ocr_faiss_integrated is not None
1910
+
1911
+ return {
1912
+ "simple_system_initialized": simple_status,
1913
+ "integrated_system_initialized": integrated_status,
1914
+ "status": "ready" if (simple_status or integrated_status) else "not_initialized"
1915
+ }
1916
+ except Exception as e:
1917
+ logger.error(f"์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
1918
+ return {"status": "error", "error": str(e)}
1919
+
1920
+ # ============================================================================
1921
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
1922
+ # ============================================================================
1923
+
1924
+ @app.post("/hybrid-rag/upload", response_model=DocumentUploadResponse)
1925
+ async def upload_hybrid_document(
1926
+ file: UploadFile = File(...),
1927
+ user_id: str = Form("default_user"),
1928
+ document_id: Optional[str] = Form(None)
1929
+ ):
1930
+ """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์—…๋กœ๋“œ"""
1931
+ try:
1932
+ # ํŒŒ์ผ ์ €์žฅ
1933
+ upload_dir = Path("uploads/hybrid_rag")
1934
+ upload_dir.mkdir(parents=True, exist_ok=True)
1935
+
1936
+ if not document_id:
1937
+ document_id = f"{user_id}_{int(time.time())}_{file.filename}"
1938
+
1939
+ file_path = upload_dir / document_id
1940
+ with open(file_path, "wb") as buffer:
1941
+ content = await file.read()
1942
+ buffer.write(content)
1943
+
1944
+ # ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ
1945
+ result = hybrid_rag_processor.process_document_hybrid(str(file_path), user_id, document_id)
1946
+
1947
+ if result["success"]:
1948
+ # ์„ฑ๊ณตํ•œ ์‹œ์Šคํ…œ ์ˆ˜ ๊ณ„์‚ฐ
1949
+ success_systems = []
1950
+ for key, value in result.items():
1951
+ if key.endswith('_processing') and value and value.get('success', False):
1952
+ system_name = key.replace('_processing', '').replace('_', ' ').title()
1953
+ success_systems.append(system_name)
1954
+
1955
+ return DocumentUploadResponse(
1956
+ success=True,
1957
+ document_id=document_id,
1958
+ message=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {', '.join(success_systems)} ์‹œ์Šคํ…œ์—์„œ ์ฒ˜๋ฆฌ๋จ",
1959
+ chunks=len(success_systems)
1960
+ )
1961
+ else:
1962
+ return DocumentUploadResponse(
1963
+ success=False,
1964
+ error=result.get("error", "๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹คํŒจ")
1965
+ )
1966
+
1967
+ except Exception as e:
1968
+ logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์—…๋กœ๋“œ ์˜ค๋ฅ˜: {e}")
1969
+ return DocumentUploadResponse(
1970
+ success=False,
1971
+ error=f"์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
1972
+ )
1973
+
1974
+ @app.post("/hybrid-rag/generate", response_model=RAGResponse)
1975
+ async def generate_hybrid_rag_response(
1976
+ query: str = Form(...),
1977
+ user_id: str = Form("default_user"),
1978
+ document_id: str = Form(...),
1979
+ use_text: bool = Form(True),
1980
+ use_image: bool = Form(True),
1981
+ use_latex: bool = Form(True),
1982
+ use_latex_ocr: bool = Form(True),
1983
+ max_length: Optional[int] = Form(None),
1984
+ temperature: Optional[float] = Form(None),
1985
+ top_p: Optional[float] = Form(None),
1986
+ do_sample: Optional[bool] = Form(None)
1987
+ ):
1988
+ """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ"""
1989
+ try:
1990
+ result = hybrid_rag_processor.generate_hybrid_response(
1991
+ query, user_id, document_id,
1992
+ use_text, use_image, use_latex, use_latex_ocr,
1993
+ max_length, temperature, top_p, do_sample
1994
+ )
1995
+
1996
+ return RAGResponse(
1997
+ success=result["success"],
1998
+ response=result["response"],
1999
+ context=result["context"],
2000
+ sources=result["sources"],
2001
+ search_results=result["search_results"],
2002
+ processing_time=result["processing_time"]
2003
+ )
2004
+
2005
+ except Exception as e:
2006
+ logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
2007
+ return RAGResponse(
2008
+ success=False,
2009
+ response=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
2010
+ context="",
2011
+ sources=[],
2012
+ search_results=0,
2013
+ processing_time=0.0
2014
+ )
2015
+
2016
+ @app.get("/hybrid-rag/document/{user_id}/{document_id}")
2017
+ async def get_hybrid_document_info(user_id: str, document_id: str):
2018
+ """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
2019
+ try:
2020
+ result = hybrid_rag_processor.get_document_info(user_id, document_id)
2021
+ return result
2022
+ except Exception as e:
2023
+ logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
2024
+ return {"success": False, "error": str(e)}
2025
+
2026
+ @app.get("/hybrid-rag/status")
2027
+ async def get_hybrid_rag_status():
2028
+ """๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์ƒํƒœ ํ™•์ธ"""
2029
+ try:
2030
+ return {
2031
+ "text_rag_available": True,
2032
+ "image_rag_available": True,
2033
+ "latex_rag_available": True,
2034
+ "latex_ocr_faiss_available": hybrid_rag_processor.latex_ocr_faiss_integrated is not None,
2035
+ "status": "ready"
2036
+ }
2037
+ except Exception as e:
2038
+ logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
2039
+ return {"status": "error", "error": str(e)}
2040
+
2041
+ # run_server_v2.py ์—์„œ ์ง์ ‘ ์‹คํ–‰ ์‹œ ์ฃผ์„ ์ฒ˜๋ฆฌ
2042
+ # if __name__ == "__main__":
2043
+ # uvicorn.run(
2044
+ # app,
2045
+ # host="0.0.0.0",
2046
+ # port=8001,
2047
+ # reload=False,
2048
+ # log_level="info"
2049
+ # )
lily_llm_api/models/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ๋ชจ๋ธ ํ”„๋กœํ•„ ๊ด€๋ฆฌ
4
+ """
5
+
6
+ # from .polyglot_ko_1_3b import PolyglotKo13bProfile
7
+ # from .dialogpt_medium import DialoGPTMediumProfile
8
+ # from .kanana_1_5_2_1b_instruct import Kanana15V21bInstructProfile
9
+ # from .kanana_nano_2_1b_instruct import KananaNano21bInstructProfile
10
+ # from .mistral_7b_instruct import Mistral7bInstructProfile
11
+ # from .polyglot_ko_5_8b import PolyglotKo58bProfile
12
+ from .polyglot_ko_1_3b_chat import PolyglotKo13bChatProfile
13
+ from .kanana_1_5_v_3b_instruct import Kanana15V3bInstructProfile
14
+ from .polyglot_ko_5_8b_chat import PolyglotKo58bChatProfile
15
+
16
+ # ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ํ”„๋กœํ•„๋“ค
17
+ AVAILABLE_MODELS = {
18
+ # "polyglot-ko-1.3b": PolyglotKo13bProfile,
19
+ # "dialogpt-medium": DialoGPTMediumProfile,
20
+ # "kanana-1.5-2.1b-instruct": Kanana15V21bInstructProfile,
21
+ # "kanana-nano-2.1b-instruct": KananaNano21bInstructProfile,
22
+ # "mistral-7b-instruct": Mistral7bInstructProfile,
23
+ # "polyglot-ko-5.8b": PolyglotKo58bProfile,
24
+ "polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
25
+ "kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
26
+ "polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,
27
+ }
28
+
29
+ def get_model_profile(model_id: str):
30
+ """๋ชจ๋ธ ID๋กœ ํ”„๋กœํ•„ ๊ฐ€์ ธ์˜ค๊ธฐ"""
31
+ if model_id not in AVAILABLE_MODELS:
32
+ raise ValueError(f"์•Œ ์ˆ˜ ์—†๋Š” ๋ชจ๋ธ ID: {model_id}")
33
+ return AVAILABLE_MODELS[model_id]()
34
+
35
+ def list_available_models():
36
+ """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก ๋ฐ˜ํ™˜"""
37
+ models = []
38
+ for model_id, profile_class in AVAILABLE_MODELS.items():
39
+ profile = profile_class()
40
+ model_info = profile.get_model_info()
41
+ models.append({
42
+ "model_id": model_id,
43
+ "name": model_info["display_name"],
44
+ "description": model_info["description"],
45
+ "language": model_info["language"],
46
+ "model_size": model_info["model_size"],
47
+ "multimodal": model_info.get("multimodal", False)
48
+ })
49
+ return models
lily_llm_api/models/dialogpt_medium.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ DialoGPT-medium ๋ชจ๋ธ ํ”„๋กœํ•„
4
+ """
5
+
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class DialoGPTMediumProfile:
14
+ """DialoGPT-medium ๋ชจ๋ธ ํ”„๋กœํ•„"""
15
+
16
+ def __init__(self):
17
+ self.model_name = "microsoft/DialoGPT-medium"
18
+ self.local_path = None # ์˜จ๋ผ์ธ์—์„œ ๋กœ๋“œ
19
+ self.display_name = "DialoGPT-medium"
20
+ self.description = "์˜์–ด ๋Œ€ํ™”ํ˜• ๋ชจ๋ธ (774M)"
21
+ self.language = "en"
22
+ self.model_size = "774M"
23
+
24
+ def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
+ """๋ชจ๋ธ ๋กœ๋“œ"""
26
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
27
+
28
+ try:
29
+ # ์˜จ๋ผ์ธ์—์„œ ๋ชจ๋ธ ๋กœ๋“œ
30
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name)
31
+ model = AutoModelForCausalLM.from_pretrained(self.model_name)
32
+
33
+ if tokenizer.pad_token is None:
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+
36
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
37
+ return model, tokenizer
38
+
39
+ except Exception as e:
40
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
41
+ raise
42
+
43
+ def format_prompt(self, user_input: str) -> str:
44
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
45
+ return f"User: {user_input}\nAssistant:"
46
+
47
+ def extract_response(self, full_text: str, formatted_prompt: str) -> str:
48
+ """์‘๋‹ต ์ถ”์ถœ"""
49
+ if "Assistant:" in full_text:
50
+ response = full_text.split("Assistant:")[-1].strip()
51
+ else:
52
+ if formatted_prompt in full_text:
53
+ response = full_text.replace(formatted_prompt, "").strip()
54
+ else:
55
+ response = full_text.strip()
56
+
57
+ return response
58
+
59
+ def get_generation_config(self) -> Dict[str, Any]:
60
+ """์ƒ์„ฑ ์„ค์ •"""
61
+ return {
62
+ "max_new_tokens": 50,
63
+ "temperature": 0.9,
64
+ "do_sample": True,
65
+ "top_k": 50,
66
+ "top_p": 0.95,
67
+ "repetition_penalty": 1.1,
68
+ "no_repeat_ngram_size": 3,
69
+ "pad_token_id": None,
70
+ "eos_token_id": None
71
+ }
72
+
73
+ def get_model_info(self) -> Dict[str, Any]:
74
+ """๋ชจ๋ธ ์ •๋ณด"""
75
+ return {
76
+ "model_name": self.model_name,
77
+ "display_name": self.display_name,
78
+ "description": self.description,
79
+ "language": self.language,
80
+ "model_size": self.model_size,
81
+ "local_path": self.local_path
82
+ }
lily_llm_api/models/kanana_1_5_2_1b_instruct.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana 1.5 2.1B Instruct ๋ชจ๋ธ ํ”„๋กœํ•„
4
+ """
5
+
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ from typing import Dict, Any, Tuple
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class Kanana15V21bInstructProfile:
14
+ """Kanana 1.5 2.1B Instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
15
+
16
+ def __init__(self):
17
+ self.model_name = "kakaocorp/kanana-1.5-2.1b-instruct-2505"
18
+ self.local_path = "./lily_llm_core/models/kanana-1.5-2.1b-instruct"
19
+ self.display_name = "Kanana 1.5 2.1B Instruct"
20
+ self.description = "Kakao์˜ Kanana 1.5 2.1B Instruct ๋ชจ๋ธ"
21
+ self.language = ["ko", "en"]
22
+ self.model_size = "2.1B"
23
+
24
+ def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
+ """๋ชจ๋ธ ๋กœ๋“œ"""
26
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
27
+
28
+ try:
29
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
30
+ tokenizer = AutoTokenizer.from_pretrained(
31
+ self.local_path,
32
+ trust_remote_code=True,
33
+ local_files_only=True
34
+ )
35
+
36
+ # ๋ชจ๋ธ ๋กœ๋“œ
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ self.local_path,
39
+ torch_dtype=torch.float32,
40
+ device_map="cpu",
41
+ low_cpu_mem_usage=True,
42
+ local_files_only=True
43
+ )
44
+
45
+ # ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
46
+ if tokenizer.pad_token is None:
47
+ tokenizer.pad_token = tokenizer.eos_token
48
+ if tokenizer.eos_token is None:
49
+ tokenizer.eos_token = "</s>"
50
+
51
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
52
+ return model, tokenizer
53
+
54
+ except Exception as e:
55
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
56
+ raise
57
+
58
+ def format_prompt(self, user_input: str) -> str:
59
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
60
+ return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
61
+
62
+ def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
+ """์‘๋‹ต ์ถ”์ถœ"""
64
+ if "<|im_start|>assistant\n" in full_text:
65
+ response = full_text.split("<|im_start|>assistant\n")[-1]
66
+ if "<|im_end|>" in response:
67
+ response = response.split("<|im_end|>")[0]
68
+ return response.strip()
69
+ return full_text.strip()
70
+
71
+ def get_generation_config(self) -> Dict[str, Any]:
72
+ """์ƒ์„ฑ ์„ค์ •"""
73
+ return {
74
+ "max_new_tokens": 512,
75
+ "temperature": 0.7,
76
+ "top_p": 0.9,
77
+ "do_sample": True,
78
+ "repetition_penalty": 1.1,
79
+ "no_repeat_ngram_size": 3,
80
+ "pad_token_id": None, # ํ† ํฌ๋‚˜์ด์ €์—์„œ ์„ค์ •๋จ
81
+ "eos_token_id": None, # ํ† ํฌ๋‚˜์ด์ €์—์„œ ์„ค์ •๋จ
82
+ }
83
+
84
+ def get_model_info(self) -> Dict[str, Any]:
85
+ """๋ชจ๋ธ ์ •๋ณด"""
86
+ return {
87
+ "model_name": self.model_name,
88
+ "display_name": self.display_name,
89
+ "description": self.description,
90
+ "language": self.language,
91
+ "model_size": self.model_size,
92
+ "local_path": self.local_path
93
+ }
lily_llm_api/models/kanana_1_5_v_3b_instruct.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana-1.5-v-3b-instruct ๋ชจ ๋ธ ํ”„๋กœํ•„ (๋‹จ์ˆœ ๋กœ๋”ฉ ์ตœ์ข…๋ณธ)
4
+ """
5
+ import sys
6
+ from typing import Dict, Any, Tuple
7
+ import torch
8
+ import logging
9
+ from transformers import AutoTokenizer
10
+
11
+ logger = logging.getLogger(__name__)
12
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
+
14
+ class Kanana15V3bInstructProfile:
15
+ """Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
16
+
17
+ def __init__(self):
18
+ # ํ™˜๊ฒฝ ๊ฐ์ง€
19
+ self.is_local = self._detect_local_environment()
20
+
21
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
22
+ if self.is_local:
23
+ self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
24
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
25
+ self.display_name = "Kanana-1.5-v-3b-instruct (๋กœ์ปฌ)"
26
+ else:
27
+ self.model_name = "gbrabbit/lily-math-model" # Hugging Face ๋ชจ๋ธ ๊ฒฝ๋กœ
28
+ self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
29
+ self.display_name = "Kanana-1.5-v-3b-instruct (์„œ๋ฒ„)"
30
+
31
+ self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
32
+ self.language = "ko"
33
+ self.model_size = "3.6B"
34
+ self.multimodal = True
35
+
36
+ def _detect_local_environment(self) -> bool:
37
+ """๋กœ์ปฌ ํ™˜๊ฒฝ์ธ์ง€ ๊ฐ์ง€"""
38
+ import os
39
+
40
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฐ์ง€ ์กฐ๊ฑด๋“ค
41
+ local_indicators = [
42
+ os.path.exists('.env'),
43
+ os.path.exists('../.env'),
44
+ os.path.exists('../../.env'),
45
+ os.getenv('IS_LOCAL') == 'true',
46
+ os.getenv('ENVIRONMENT') == 'local',
47
+ os.getenv('DOCKER_ENV') == 'local',
48
+ # Windows ๊ฒฝ๋กœ ํ™•์ธ
49
+ os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
50
+ ]
51
+
52
+ is_local = any(local_indicators)
53
+ logger.info(f"๐Ÿ” ํ™˜๊ฒฝ ๊ฐ์ง€: {'๋กœ์ปฌ' if is_local else '์„œ๋ฒ„'}")
54
+ return is_local
55
+
56
+ def _load_environment_variables(self):
57
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
58
+ import os
59
+
60
+ try:
61
+ if self.is_local:
62
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: .env ํŒŒ์ผ ๋กœ๋“œ
63
+ from dotenv import load_dotenv
64
+
65
+ # ์—ฌ๋Ÿฌ ๊ฒฝ๋กœ์—์„œ .env ํŒŒ์ผ ์ฐพ๊ธฐ
66
+ env_paths = [
67
+ '.env',
68
+ '../.env',
69
+ '../../.env',
70
+ 'C:/Project/lily_generate_project/lily_generate_package/.env',
71
+ ]
72
+
73
+ env_loaded = False
74
+ for env_path in env_paths:
75
+ if os.path.exists(env_path):
76
+ load_dotenv(env_path)
77
+ logger.info(f"โœ… ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ๋จ: {env_path}")
78
+ env_loaded = True
79
+ break
80
+
81
+ if not env_loaded:
82
+ logger.warning("โš ๏ธ .env ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
83
+ else:
84
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ: ์‹œ์Šคํ…œ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ
85
+ logger.info("๐ŸŒ ์„œ๋ฒ„ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์‚ฌ์šฉ")
86
+
87
+ except ImportError:
88
+ logger.warning("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
89
+ except Exception as e:
90
+ logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
91
+
92
+ def load_model(self) -> Tuple[Any, Any]:
93
+ """ํ™˜๊ฒฝ์— ๋”ฐ๋ผ ๋ชจ๋ธ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค."""
94
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
95
+
96
+ import os
97
+ from pathlib import Path
98
+
99
+ # ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋”ฉ
100
+ self._load_environment_variables()
101
+
102
+ try:
103
+ # 1. ๋กœ์ปฌ ์บ์‹œ ๊ฒฝ๋กœ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ
104
+ local_model_path = Path(self.local_path)
105
+ use_local = local_model_path.exists() and any(local_model_path.iterdir())
106
+
107
+ if use_local:
108
+ logger.info(f"๐Ÿ—‚๏ธ ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ: {self.local_path}")
109
+ model_path = self.local_path
110
+ local_files_only = True
111
+
112
+ # ๋กœ์ปฌ ๋ชจ๋ธ์˜ ๊ฒฝ์šฐ sys.path์— ์ถ”๊ฐ€
113
+ if self.local_path not in sys.path:
114
+ sys.path.insert(0, self.local_path)
115
+ else:
116
+ logger.info(f"๐ŸŒ Hugging Face Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: {self.model_name}")
117
+ model_path = self.model_name
118
+ local_files_only = False
119
+
120
+ # ํ™˜๊ฒฝ๋ณ„ ์ถ”๊ฐ€ ์„ค์ •
121
+ if self.is_local:
122
+ logger.info("๐Ÿ  ๋กœ์ปฌ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
123
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ์—์„œ๋Š” ์ถ”๊ฐ€ ์„ค์ •์ด ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Œ
124
+ else:
125
+ logger.info("โ˜๏ธ ์„œ๋ฒ„ ํ™˜๊ฒฝ ์„ค์ • ์ ์šฉ")
126
+ # ์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ๋“ฑ ์„ค์ •
127
+
128
+ # 2. ํ† ํฌ๋‚˜์ด๏ฟฝ๏ฟฝ๏ฟฝ ๋กœ๋“œ
129
+ tokenizer = AutoTokenizer.from_pretrained(
130
+ model_path,
131
+ trust_remote_code=True,
132
+ local_files_only=local_files_only,
133
+ cache_dir="/app/cache/transformers" if not use_local else None
134
+ )
135
+ logger.info(f"โœ… ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ ์™„๋ฃŒ ({tokenizer.__class__.__name__})")
136
+
137
+ # 3. ๋ชจ๋ธ ๋กœ๋“œ
138
+ if use_local:
139
+ # ๋กœ์ปฌ ๋ชจ๋ธ: ์ปค์Šคํ…€ ๋ชจ๋ธ๋ง ํด๋ž˜์Šค ์‚ฌ์šฉ
140
+ from modeling import KananaVForConditionalGeneration
141
+ model = KananaVForConditionalGeneration.from_pretrained(
142
+ model_path,
143
+ trust_remote_code=True,
144
+ torch_dtype=torch.float16,
145
+ local_files_only=True,
146
+ low_cpu_mem_usage=True,
147
+ ).to(DEVICE)
148
+ else:
149
+ # Hugging Face Hub: ํ‘œ์ค€ AutoModel ์‚ฌ์šฉ
150
+ from transformers import AutoModelForCausalLM
151
+ model = AutoModelForCausalLM.from_pretrained(
152
+ model_path,
153
+ trust_remote_code=True,
154
+ torch_dtype=torch.float16,
155
+ cache_dir="/app/cache/transformers",
156
+ low_cpu_mem_usage=True,
157
+ ).to(DEVICE)
158
+
159
+ logger.info(f"โœ… ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ ({model.__class__.__name__})")
160
+ return model, tokenizer
161
+
162
+ except Exception as e:
163
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}", exc_info=True)
164
+ if use_local and self.local_path in sys.path:
165
+ sys.path.remove(self.local_path)
166
+ raise
167
+
168
+ def get_generation_config(self) -> Dict[str, Any]:
169
+ # ๋ชจ๋ธ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™” ์„ค์ •, max_new_tokens : ์ƒ์„ฑ๋˜๋Š” ํ…์ŠคํŠธ ๊ธธ์ด ์ตœ๋Œ€๊ฐ’ (์ด๋ฏธ์ง€ ์„ค๋ช…์„ ์œ„ํ•ด ์ฆ๊ฐ€)
170
+ return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
171
+
172
+ def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
173
+ """
174
+ ๋‹ค์–‘ํ•œ ์‘๋‹ต ํ˜•์‹์„ ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ๋” ๋˜‘๋˜‘ํ•œ ์‘๋‹ต ์ถ”์ถœ ํ•จ์ˆ˜
175
+ """
176
+ logger.info(f"--- ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
177
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
178
+
179
+ # ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์ œ๊ณต๋œ ๊ฒฝ์šฐ ์ด๋ฅผ ์ œ๊ฑฐ
180
+ if formatted_prompt and formatted_prompt in full_text:
181
+ response = full_text.replace(formatted_prompt, "").strip()
182
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
183
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
184
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
185
+ return response
186
+
187
+ # 1์ˆœ์œ„: ๊ฐ€์žฅ ์ •ํ™•ํ•œ ํŠน์ˆ˜ ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
188
+ # ์˜ˆ: <|start_header_id|>assistant<|end_header_id|>์•ˆ๋…•ํ•˜์„ธ์š”...
189
+ # ๋˜๋Š” <|im_start|>assistant์•ˆ๋…•ํ•˜์„ธ์š”...
190
+ assistant_tags = [
191
+ "<|start_header_id|>assistant<|end_header_id|>",
192
+ "<|im_start|>assistant",
193
+ "assistant\n",
194
+ "assistant:"
195
+ ]
196
+ for tag in assistant_tags:
197
+ if tag in full_text:
198
+ parts = full_text.split(tag)
199
+ if len(parts) > 1:
200
+ response = parts[-1].strip()
201
+ # ์ถ”๊ฐ€ ์ •๋ฆฌ: ํŠน์ˆ˜ ํ† ํฐ ์ œ๊ฑฐ
202
+ response = response.replace("<|im_end|>", "").strip()
203
+ logger.info(f"โœ… ์„ฑ๊ณต: '{tag}' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
204
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
205
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
206
+ return response
207
+
208
+ # 2์ˆœ์œ„: ๊ฐ„๋‹จํ•œ ํ‚ค์›Œ๋“œ๋กœ ์ถ”์ถœ ์‹œ๋„
209
+ # ์˜ˆ: ... user ์•ˆ๋…•ํ•˜์„ธ์š” assistant ์•ˆ๋…•ํ•˜์„ธ์š” ...
210
+ if "assistant" in full_text:
211
+ parts = full_text.split("assistant")
212
+ if len(parts) > 1:
213
+ response = parts[-1].strip()
214
+ response = response.replace("<|im_end|>", "").strip()
215
+ logger.info("โœ… ์„ฑ๊ณต: 'assistant' ํ‚ค์›Œ๋“œ๋กœ ์‘๋‹ต ์ถ”์ถœ")
216
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
217
+ if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
218
+ return response
219
+
220
+ # 3์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ, ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„ ์ œ๊ฑฐ
221
+ clean_text = full_text.strip()
222
+ # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
223
+ patterns_to_remove = [
224
+ "<|im_start|>user\n",
225
+ "<|im_end|>",
226
+ "<image>",
227
+ "user\n",
228
+ "assistant\n"
229
+ ]
230
+
231
+ for pattern in patterns_to_remove:
232
+ clean_text = clean_text.replace(pattern, "")
233
+
234
+ clean_text = clean_text.strip()
235
+
236
+ if clean_text and clean_text != full_text:
237
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
238
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
239
+ return clean_text
240
+
241
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ์‘๋‹ต์—์„œ assistant ๋ถ€๋ถ„์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
242
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {full_text}")
243
+ return full_text
244
+
245
+ def get_model_info(self) -> Dict[str, Any]:
246
+ return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}
lily_llm_api/models/kanana_nano_2_1b_instruct.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana Nano 2.1B Instruct ๋ชจ๋ธ ํ”„๋กœํ•„
4
+ """
5
+
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+ from typing import Dict, Any, Tuple
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ class KananaNano21bInstructProfile:
14
+ """Kanana Nano 2.1B Instruct ๋ชจ๋ธ ํ”„๋กœํ•„"""
15
+
16
+ def __init__(self):
17
+ self.model_name = "kakaocorp/kanana-nano-2.1b-instruct"
18
+ self.local_path = "./lily_llm_core/models/kanana-nano-2.1b-instruct"
19
+ self.display_name = "Kanana Nano 2.1B Instruct"
20
+ self.description = "Kakao์˜ Kanana Nano 2.1B Instruct ๋ชจ๋ธ (๊ฐ€์žฅ ์ž‘์€ ๋ชจ๋ธ)"
21
+ self.language = ["ko", "en"]
22
+ self.model_size = "2.1B"
23
+
24
+ def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
+ """๋ชจ๋ธ ๋กœ๋“œ"""
26
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
27
+
28
+ try:
29
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
30
+ tokenizer = AutoTokenizer.from_pretrained(
31
+ self.local_path,
32
+ trust_remote_code=True,
33
+ local_files_only=True
34
+ )
35
+
36
+ # ๋ชจ๋ธ ๋กœ๋“œ
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ self.local_path,
39
+ torch_dtype=torch.float32,
40
+ device_map="cpu",
41
+ low_cpu_mem_usage=True,
42
+ local_files_only=True
43
+ )
44
+
45
+ # ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
46
+ if tokenizer.pad_token is None:
47
+ tokenizer.pad_token = tokenizer.eos_token
48
+ if tokenizer.eos_token is None:
49
+ tokenizer.eos_token = "</s>"
50
+
51
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
52
+ return model, tokenizer
53
+
54
+ except Exception as e:
55
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
56
+ raise
57
+
58
+ def format_prompt(self, user_input: str) -> str:
59
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
60
+ return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
61
+
62
+ def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
+ """์‘๋‹ต ์ถ”์ถœ"""
64
+ if "<|im_start|>assistant\n" in full_text:
65
+ response = full_text.split("<|im_start|>assistant\n")[-1]
66
+ if "<|im_end|>" in response:
67
+ response = response.split("<|im_end|>")[0]
68
+ return response.strip()
69
+ return full_text.strip()
70
+
71
+ def get_generation_config(self) -> Dict[str, Any]:
72
+ """์ƒ์„ฑ ์„ค์ •"""
73
+ return {
74
+ "max_new_tokens": 128, # 512์—์„œ 128๋กœ ์ค„์ž„
75
+ "temperature": 0.7,
76
+ "top_p": 0.9,
77
+ "do_sample": True,
78
+ "repetition_penalty": 1.1,
79
+ "no_repeat_ngram_size": 3,
80
+ "pad_token_id": None, # ํ† ํฌ๋‚˜์ด์ €์—์„œ ์„ค์ •๋จ
81
+ "eos_token_id": None, # ํ† ํฌ๋‚˜์ด์ €์—์„œ ์„ค์ •๋จ
82
+ "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ
83
+ "return_dict_in_generate": False, # ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ
84
+ }
85
+
86
+ def get_model_info(self) -> Dict[str, Any]:
87
+ """๋ชจ๋ธ ์ •๋ณด"""
88
+ return {
89
+ "model_name": self.model_name,
90
+ "display_name": self.display_name,
91
+ "description": self.description,
92
+ "language": self.language,
93
+ "model_size": self.model_size,
94
+ "local_path": self.local_path
95
+ }
lily_llm_api/models/mistral_7b_instruct.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ ํ”„๋กœํ•„
4
+ mistralai/Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ์šฉ
5
+ """
6
+
7
+ from typing import Dict, Any, Tuple
8
+ import torch
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class Mistral7bInstructProfile:
15
+ """Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ ํ”„๋กœํ•„"""
16
+
17
+ def __init__(self):
18
+ self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
19
+ self.local_path = "./lily_llm_core/models/mistral-7B-Instruct-v0.2"
20
+ self.display_name = "Mistral-7B-Instruct-v0.2"
21
+ self.description = "Mistral AI์˜ 7B ํŒŒ๋ผ๋ฏธํ„ฐ ์ธ์ŠคํŠธ๋ŸญํŠธ ๋ชจ๋ธ"
22
+ self.language = "en"
23
+ self.model_size = "7B"
24
+
25
+ def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
26
+ """๋ชจ๋ธ ๋กœ๋“œ"""
27
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
28
+
29
+ try:
30
+ # ๋กœ์ปฌ ๋ชจ๋ธ ๋กœ๋“œ
31
+ tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
32
+
33
+ if tokenizer.pad_token is None:
34
+ tokenizer.pad_token = tokenizer.eos_token
35
+
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ self.local_path,
38
+ trust_remote_code=True,
39
+ local_files_only=True,
40
+ torch_dtype=torch.bfloat16,
41
+ # device_map="cpu",
42
+ # low_cpu_mem_usage=True
43
+ # max_memory={"cpu": "8GB"}
44
+ )
45
+
46
+ # ๋ชจ๋ธ์„ CPU๋กœ ๋ช…์‹œ์  ์ด๋™
47
+ model.to('cpu')
48
+
49
+ logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
50
+ return model, tokenizer
51
+
52
+ except Exception as e:
53
+ logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
54
+ raise
55
+
56
+ def format_prompt(self, user_input: str) -> str:
57
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - Mistral ์ธ์ŠคํŠธ๋ŸญํŠธ ํ˜•์‹"""
58
+ # Mistral-7B-Instruct-v0.2 ๋ชจ๋ธ์˜ ๊ถŒ์žฅ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹
59
+ prompt = f"""<s>[INST] {user_input} [/INST]"""
60
+ return prompt
61
+
62
+ def extract_response(self, full_text: str, formatted_prompt: str) -> str:
63
+ """์‘๋‹ต ์ถ”์ถœ"""
64
+ # Mistral ๋ชจ๋ธ์˜ ์‘๋‹ต ์ถ”์ถœ
65
+ if "[/INST]" in full_text:
66
+ response = full_text.split("[/INST]")[-1].strip()
67
+ else:
68
+ # ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ
69
+ if formatted_prompt in full_text:
70
+ response = full_text.replace(formatted_prompt, "").strip()
71
+ else:
72
+ response = full_text.strip()
73
+
74
+ # ๋นˆ ์‘๋‹ต์ด๋‚˜ ์ด์ƒํ•œ ๋ฌธ์ž๋งŒ ์žˆ๋Š” ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
75
+ if not response or len(response.strip()) < 2:
76
+ return "Hello! How can I help you today?"
77
+
78
+ return response
79
+
80
+ def get_generation_config(self) -> Dict[str, Any]:
81
+ """์ƒ์„ฑ ์„ค์ •"""
82
+ return {
83
+ "max_new_tokens": 128,
84
+ "temperature": 0.7,
85
+ "do_sample": True,
86
+ "top_k": 50,
87
+ "top_p": 0.9,
88
+ "repetition_penalty": 1.1,
89
+ "no_repeat_ngram_size": 3,
90
+ "pad_token_id": None, # ๋ชจ๋ธ์—์„œ ์ž๋™ ์„ค์ •
91
+ "eos_token_id": None # ๋ชจ๋ธ์—์„œ ์ž๋™ ์„ค์ •
92
+ }
93
+
94
+ def get_model_info(self) -> Dict[str, Any]:
95
+ """๋ชจ๋ธ ์ •๋ณด"""
96
+ return {
97
+ "model_name": self.model_name,
98
+ "display_name": self.display_name,
99
+ "description": self.description,
100
+ "language": self.language,
101
+ "model_size": self.model_size,
102
+ "local_path": self.local_path
103
+ }