Spaces:

everydayconversify
/

conversify-model

Sleeping

koh1018 commited on Jul 5

Commit

dd11498

1 Parent(s): 8b098af

Add llama-cpp-python wheel via Git LFS

Files changed (5) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -19,20 +19,16 @@ USER user
 ENV HF_HOME /app/huggingface_cache
 ENV PATH="/home/user/.local/bin:${PATH}"
-# 7. requirements.txt 파일을 복사.
 COPY ./requirements.txt requirements.txt
-# 8. llama-cpp-python을 '똑똑하게' 설치하기 위한 모든 옵션을 설정.
-ENV CMAKE_ARGS="-DLLAMA_CUBLAS=OFF -DLLAMA_HIPBLAS=OFF -DLLAMA_OPENBLAS=OFF -DLLAMA_CMAKE_BUILD_PARALLEL=ON"
-ENV FORCE_CMAKE=1
-ENV CC=gcc-11
-ENV CXX=g++-11
-# 9. 라이브러리 설치. (경로 수정 완료!)
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# 10. 나머지 모든 소스 코드를 작업 폴더에 복사.
 COPY . /app
-# 11. 모든 준비가 끝나면, 서버를 실행.
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 ENV HF_HOME /app/huggingface_cache
 ENV PATH="/home/user/.local/bin:${PATH}"
+# 7. requirements.txt 파일과 wheel 파일을 복사.
 COPY ./requirements.txt requirements.txt
+COPY ./llama_cpp_python-0.3.11-cp39-cp39-linux_x86_64.whl .
+# 8. llama-cpp-python을 wheel 파일로 설치 후 나머지 라이브러리 설치.
+RUN pip install --no-cache-dir ./llama_cpp_python-0.3.11-cp39-cp39-linux_x86_64.whl
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# 9. 나머지 모든 소스 코드를 작업 폴더에 복사.
 COPY . /app
+# 10. 모든 준비가 끝나면, 서버를 실행.
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -13,12 +13,12 @@ app = FastAPI()
 # 2. GGUF 모델 로딩 준비
 # #    TheBloke의 SOLAR 모델을 예시로 사용.
 # #    'repo_id'는 모델이 있는 저장소, 'filename'은 그 안의 특정 GGUF 파일명.
-# model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF"
-# model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf"
-#    7B 모델인 Qwen 2.5로 변경하여 테스트
-model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF"
-model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf"
 # Hugging Face Hub에서 GGUF 파일을 다운로드하고, 로컬 경로를 가져온다.
 # 이 과정은 서버 시작 시 한번만 실행된다.

 # 2. GGUF 모델 로딩 준비
 # #    TheBloke의 SOLAR 모델을 예시로 사용.
 # #    'repo_id'는 모델이 있는 저장소, 'filename'은 그 안의 특정 GGUF 파일명.
+model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF"
+model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf"
+# #    7B 모델인 Qwen 2.5로 변경하여 테스트
+# model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF"
+# model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf"
 # Hugging Face Hub에서 GGUF 파일을 다운로드하고, 로컬 경로를 가져온다.
 # 이 과정은 서버 시작 시 한번만 실행된다.

llama_cpp_python-0.3.11-cp39-cp39-linux_x86_64.whl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b23acdcb9de8fd44ccb85b73ece0aa211f31717697d3b5c5f3d37acbee095a04
+size 4176374

requirements.txt CHANGED Viewed

@@ -4,5 +4,4 @@ pydantic
 python-multipart
 torch
 sentencepiece
-llama-cpp-python
 huggingface-hub

 python-multipart
 torch
 sentencepiece
 huggingface-hub