Spaces:
Paused
Paused
Upload 6 files
Browse files- main.py +8 -2
- requirements.txt +2 -1
main.py
CHANGED
@@ -4,6 +4,7 @@ from txtai.embeddings import Embeddings
|
|
4 |
from txtai.pipeline import Extractor
|
5 |
from llama_cpp import Llama
|
6 |
|
|
|
7 |
|
8 |
# NOTE - we configure docs_url to serve the interactive Docs at the root path
|
9 |
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
|
@@ -16,15 +17,20 @@ app = FastAPI(docs_url="/")
|
|
16 |
# Create extractor instance
|
17 |
#extractor = Extractor(embeddings, "google/flan-t5-base")
|
18 |
|
19 |
-
pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
@app.get("/generate")
|
23 |
def generate(text: str):
|
24 |
"""
|
25 |
llama2 q4 backend
|
26 |
"""
|
27 |
-
output =
|
28 |
return {"output": output[0]["generated_text"]}
|
29 |
|
30 |
|
|
|
4 |
from txtai.pipeline import Extractor
|
5 |
from llama_cpp import Llama
|
6 |
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
# NOTE - we configure docs_url to serve the interactive Docs at the root path
|
10 |
# of the app. This way, we can use the docs as a landing page for the app on Spaces.
|
|
|
17 |
# Create extractor instance
|
18 |
#extractor = Extractor(embeddings, "google/flan-t5-base")
|
19 |
|
20 |
+
# pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
|
21 |
|
22 |
+
model_name_or_path = "TheBloke/Llama-2-7B-GGML"
|
23 |
+
model_basename = "llama-2-7b.ggmlv3.q4_0.bin"
|
24 |
+
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
|
25 |
+
|
26 |
+
llm = Llama(model_path=model_path)
|
27 |
|
28 |
@app.get("/generate")
|
29 |
def generate(text: str):
|
30 |
"""
|
31 |
llama2 q4 backend
|
32 |
"""
|
33 |
+
output = llm(text)
|
34 |
return {"output": output[0]["generated_text"]}
|
35 |
|
36 |
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ fastapi==0.74.*
|
|
2 |
requests==2.27.*
|
3 |
uvicorn[standard]==0.17.*
|
4 |
sentencepiece==0.1.*
|
5 |
-
txtai==6.0.*
|
|
|
|
2 |
requests==2.27.*
|
3 |
uvicorn[standard]==0.17.*
|
4 |
sentencepiece==0.1.*
|
5 |
+
txtai==6.0.*
|
6 |
+
llama-cpp-python
|