DeepVen commited on
Commit
fa2ae65
1 Parent(s): 93bc725

Upload 6 files

Browse files
Files changed (2) hide show
  1. main.py +8 -2
  2. requirements.txt +2 -1
main.py CHANGED
@@ -4,6 +4,7 @@ from txtai.embeddings import Embeddings
4
  from txtai.pipeline import Extractor
5
  from llama_cpp import Llama
6
 
 
7
 
8
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
9
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
@@ -16,15 +17,20 @@ app = FastAPI(docs_url="/")
16
  # Create extractor instance
17
  #extractor = Extractor(embeddings, "google/flan-t5-base")
18
 
19
- pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
20
 
 
 
 
 
 
21
 
22
  @app.get("/generate")
23
  def generate(text: str):
24
  """
25
  llama2 q4 backend
26
  """
27
- output = pipe(text)
28
  return {"output": output[0]["generated_text"]}
29
 
30
 
 
4
  from txtai.pipeline import Extractor
5
  from llama_cpp import Llama
6
 
7
+ from huggingface_hub import hf_hub_download
8
 
9
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
10
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 
17
  # Create extractor instance
18
  #extractor = Extractor(embeddings, "google/flan-t5-base")
19
 
20
+ # pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
21
 
22
+ model_name_or_path = "TheBloke/Llama-2-7B-GGML"
23
+ model_basename = "llama-2-7b.ggmlv3.q4_0.bin"
24
+ model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
25
+
26
+ llm = Llama(model_path=model_path)
27
 
28
  @app.get("/generate")
29
  def generate(text: str):
30
  """
31
  llama2 q4 backend
32
  """
33
+ output = llm(text)
34
  return {"output": output[0]["generated_text"]}
35
 
36
 
requirements.txt CHANGED
@@ -2,4 +2,5 @@ fastapi==0.74.*
2
  requests==2.27.*
3
  uvicorn[standard]==0.17.*
4
  sentencepiece==0.1.*
5
- txtai==6.0.*
 
 
2
  requests==2.27.*
3
  uvicorn[standard]==0.17.*
4
  sentencepiece==0.1.*
5
+ txtai==6.0.*
6
+ llama-cpp-python