DeepVen commited on
Commit
93bc725
1 Parent(s): 1af91d4

Upload 6 files

Browse files

pointing to llama2

Files changed (1) hide show
  1. main.py +13 -11
main.py CHANGED
@@ -2,25 +2,27 @@ from fastapi import FastAPI
2
  from transformers import pipeline
3
  from txtai.embeddings import Embeddings
4
  from txtai.pipeline import Extractor
 
 
5
 
6
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
7
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
8
  app = FastAPI(docs_url="/")
9
 
10
  # Create embeddings model with content support
11
- embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
12
- embeddings.load('index')
13
 
14
  # Create extractor instance
15
- extractor = Extractor(embeddings, "google/flan-t5-base")
16
 
17
- pipe = pipeline("text2text-generation", model="google/flan-t5-large")
18
 
19
 
20
  @app.get("/generate")
21
  def generate(text: str):
22
  """
23
- deployed flan-t5-xxl model as backend
24
  """
25
  output = pipe(text)
26
  return {"output": output[0]["generated_text"]}
@@ -40,9 +42,9 @@ def search(query, question=None):
40
  return extractor([("answer", query, prompt(question), False)])[0][1]
41
 
42
 
43
- @app.get("/rag")
44
- def rag(question: str):
45
- # question = "what is the document about?"
46
- answer = search(question)
47
- # print(question, answer)
48
- return {answer}
 
2
  from transformers import pipeline
3
  from txtai.embeddings import Embeddings
4
  from txtai.pipeline import Extractor
5
+ from llama_cpp import Llama
6
+
7
 
8
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
9
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
10
  app = FastAPI(docs_url="/")
11
 
12
  # Create embeddings model with content support
13
+ # embeddings = Embeddings({"path": "sentence-transformers/all-MiniLM-L6-v2", "content": True})
14
+ # embeddings.load('index')
15
 
16
  # Create extractor instance
17
+ #extractor = Extractor(embeddings, "google/flan-t5-base")
18
 
19
+ pipe = pipeline(model="TheBloke/Llama-2-7B-GGML/llama-2-7b.ggmlv3.q4_0.bin")
20
 
21
 
22
  @app.get("/generate")
23
  def generate(text: str):
24
  """
25
+ llama2 q4 backend
26
  """
27
  output = pipe(text)
28
  return {"output": output[0]["generated_text"]}
 
42
  return extractor([("answer", query, prompt(question), False)])[0][1]
43
 
44
 
45
+ # @app.get("/rag")
46
+ # def rag(question: str):
47
+ # # question = "what is the document about?"
48
+ # answer = search(question)
49
+ # # print(question, answer)
50
+ # return {answer}