robinroy03 commited on
Commit
f23adce
·
1 Parent(s): 6cff55d

modified to implement router

Browse files
Files changed (2) hide show
  1. app.py +31 -72
  2. utils.py +89 -0
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
  You call this ENDPOINT and it returns you a JSON which is of this format:
3
 
4
- POST FORMAT:
5
  {
6
  "query": "????",
7
  "llm": "llama70b-whatever",
@@ -12,88 +12,41 @@ POST FORMAT:
12
  RESPONSE FORMAT:
13
  {
14
  "response": "blabla",
15
- "references": ["1", "2", "3", ...]
16
  }
17
  """
18
 
19
  # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
20
 
21
- import ast
22
- import json
23
-
24
- import aiohttp
25
  from flask import Flask
26
  from flask import request
27
- import requests # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY
28
 
29
- app = Flask(__name__)
30
 
31
- def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
32
- """
33
- Returns output from the LLM using the given user-question and retrived context
34
- """
35
-
36
- URL_LLM = 'https://robinroy03-fury-bot.hf.space'
37
- # URL_LLM = 'http://localhost:11434' # NOTE: FOR TESTING
38
-
39
- context = ""
40
- references = ""
41
- for i in range(len(db_knn['matches'])):
42
- data = db_knn['matches'][i]['metadata']['data']
43
- context += (data + "\n")
44
- data = ast.literal_eval(data)
45
- references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "")
46
- if data.get("function_name"):
47
- references += f"\tFunction Name: {data.get('function_name')}"
48
- elif data.get("class_name"):
49
- references += f"\tClass Name: {data.get('class_name')}"
50
- elif data['type'] == 'rst':
51
- references += f"\tDocumentation: {data['path'].split("/")[-1]}"
52
- elif data['type'] == 'documentation_examples':
53
- references += f"\tDocumentation: {data['path'].split("/")[-1]}"
54
- references += "\n"
55
-
56
- prompt = f"""
57
- You are a senior developer. Answer the users question based on the context provided.
58
- Question: {question}
59
- Context: {context}
60
- """
61
- obj = {
62
- 'model': llm,
63
- 'prompt': prompt,
64
- 'stream': stream
65
- }
66
- response = requests.post(URL_LLM + "/api/generate", json=obj)
67
- response_json = json.loads(response.text)
68
- return (response_json['choices'][0]['message']['content'], references)
69
 
 
70
 
71
- def embedding_output(message: str) -> list:
72
- """
73
- Returns embeddings for the given message
74
- rtype: list of embeddings. Length depends on the model.
75
- """
76
-
77
- URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space'
78
- response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message})
79
- response_json = json.loads(response.text)
80
- return response_json['output']
81
 
 
 
 
 
82
 
83
- def db_output(embedding: list, knn: int) -> dict:
84
- """
85
- Returns the KNN results.
86
- rtype: JSON
87
- """
88
 
89
- URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space'
90
- response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn})
91
- response_json = json.loads(response.text)
92
- return response_json
93
 
94
 
95
- @app.route("/api/generate", methods=['POST'])
96
- def completion():
97
  message = request.get_json()
98
 
99
  query: str = message['query']
@@ -103,18 +56,24 @@ def completion():
103
 
104
  embedding_data = embedding_output(query)
105
  db_knn = db_output(embedding_data, knn)
106
- output, references = llm_output(query, db_knn, llm, stream)
 
 
 
 
 
 
 
107
 
108
  return {
109
- "response": output,
110
  "references": references
111
  }
112
 
113
-
114
  """
115
- curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{
116
  "query": "How do I create a sphere in FURY?",
117
- "llm": "llama3-70b-8192",
118
  "knn": "3",
119
  "stream": false
120
  }'
 
1
  """
2
  You call this ENDPOINT and it returns you a JSON which is of this format:
3
 
4
+ POST FORMAT: (/api/groq or api/google or /api/ollama ...)
5
  {
6
  "query": "????",
7
  "llm": "llama70b-whatever",
 
12
  RESPONSE FORMAT:
13
  {
14
  "response": "blabla",
15
+ "references": "1, 2, 3"
16
  }
17
  """
18
 
19
  # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
20
 
 
 
 
 
21
  from flask import Flask
22
  from flask import request
 
23
 
24
+ from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ app = Flask(__name__)
28
 
29
+ @app.route("/api/groq/generate", methods=['POST'])
30
+ def groq_completion():
31
+ message = request.get_json()
 
 
 
 
 
 
 
32
 
33
+ query: str = message['query']
34
+ llm: str = message['llm']
35
+ knn: int = int(message['knn'])
36
+ stream: bool = bool(message['stream'])
37
 
38
+ embedding_data = embedding_output(query)
39
+ db_knn = db_output(embedding_data, knn)
40
+ output, references = groq_llm_output(query, db_knn, llm, stream)
 
 
41
 
42
+ return {
43
+ "response": output,
44
+ "references": references
45
+ }
46
 
47
 
48
+ @app.route("/api/ollama/generate", methods=['POST'])
49
+ def ollama_completion():
50
  message = request.get_json()
51
 
52
  query: str = message['query']
 
56
 
57
  embedding_data = embedding_output(query)
58
  db_knn = db_output(embedding_data, knn)
59
+ response_json, references = ollama_llm_output(query, db_knn, llm, stream)
60
+
61
+ if response_json.get("error"):
62
+ print(response_json)
63
+ return {
64
+ "response": "An error occured, try again.",
65
+ "references": "No references"
66
+ }
67
 
68
  return {
69
+ "response": response_json['response'],
70
  "references": references
71
  }
72
 
 
73
  """
74
+ curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{
75
  "query": "How do I create a sphere in FURY?",
76
+ "llm": "phi3",
77
  "knn": "3",
78
  "stream": false
79
  }'
utils.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import ast
4
+
5
+
6
+ def prompt_generator(question: str, db_knn: dict) -> tuple[str, str, str]:
7
+ context = ""
8
+ references = ""
9
+ for i in range(len(db_knn['matches'])):
10
+ data = db_knn['matches'][i]['metadata']['data']
11
+ context += (data + "\n")
12
+ data = ast.literal_eval(data)
13
+ references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "")
14
+ if data.get("function_name"):
15
+ references += f"\tFunction Name: {data.get('function_name')}"
16
+ elif data.get("class_name"):
17
+ references += f"\tClass Name: {data.get('class_name')}"
18
+ elif data['type'] == 'rst':
19
+ references += f"\tDocumentation: {data['path'].split("/")[-1]}"
20
+ elif data['type'] == 'documentation_examples':
21
+ references += f"\tDocumentation: {data['path'].split("/")[-1]}"
22
+ references += "\n"
23
+
24
+ prompt = f"""
25
+ You are a senior developer. Answer the users question based on the context provided.
26
+ Question: {question}
27
+ Context: {context}
28
+ """
29
+ return prompt, context, references
30
+
31
+
32
+ def groq_llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
33
+ """
34
+ Returns output from the LLM using the given user-question and retrived context
35
+ """
36
+
37
+ URL_LLM = 'https://robinroy03-fury-bot.hf.space'
38
+ prompt, context, references = prompt_generator(question, db_knn)
39
+ obj = {
40
+ 'model': llm,
41
+ 'prompt': prompt,
42
+ 'stream': stream
43
+ }
44
+ response = requests.post(URL_LLM + "/api/generate", json=obj)
45
+ response_json = json.loads(response.text)
46
+ return (response_json['choices'][0]['message']['content'], references)
47
+
48
+
49
+ def embedding_output(message: str) -> list:
50
+ """
51
+ Returns embeddings for the given message
52
+ rtype: list of embeddings. Length depends on the model.
53
+ """
54
+
55
+ URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space'
56
+ response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message})
57
+ response_json = json.loads(response.text)
58
+ return response_json['output']
59
+
60
+
61
+ def db_output(embedding: list, knn: int) -> dict:
62
+ """
63
+ Returns the KNN results.
64
+ rtype: JSON
65
+ """
66
+
67
+ URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space'
68
+ response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn})
69
+ response_json = json.loads(response.text)
70
+ return response_json
71
+
72
+
73
+ def ollama_llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
74
+ # URL_LLM = 'https://robinroy03-ollama-server-backend.hf.space'
75
+ URL_LLM = "http://localhost:11434"
76
+ prompt, context, references = prompt_generator(question, db_knn)
77
+ obj = {
78
+ "model": llm,
79
+ "prompt": question,
80
+ "stream": stream
81
+ }
82
+ try:
83
+ response = requests.post(URL_LLM + "/api/generate", json=obj)
84
+ except Exception as e:
85
+ print(e)
86
+ return {"error": e}
87
+
88
+ response_json = json.loads(response.text)
89
+ return response_json, references