Daniel Marques commited on
Commit
70e00d3
1 Parent(s): 36d73c0
Files changed (2) hide show
  1. constants.py +2 -2
  2. main.py +22 -20
constants.py CHANGED
@@ -32,12 +32,12 @@ CHROMA_SETTINGS = Settings(
32
  )
33
 
34
  # Context Window and Max New Tokens
35
- CONTEXT_WINDOW_SIZE = 1024
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
39
 
40
- N_GPU_LAYERS = 50 # Llama-2-70B has 83 layers
41
  N_BATCH = 2048
42
 
43
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
 
32
  )
33
 
34
  # Context Window and Max New Tokens
35
+ CONTEXT_WINDOW_SIZE = 2048
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
39
 
40
+ N_GPU_LAYERS = 83 # Llama-2-70B has 83 layers
41
  N_BATCH = 2048
42
 
43
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
main.py CHANGED
@@ -136,28 +136,33 @@ def delete_source_route(data: Delete):
136
  raise HTTPException(status_code=400, detail=print(f"The file {file_to_delete} does not exist."))
137
 
138
  @api_app.post('/predict')
139
- async def predict(data: Predict):
140
  global QA
141
- user_prompt = data.prompt
142
- if user_prompt:
143
- res = QA(user_prompt)
 
144
 
145
- answer, docs = res["result"], res["source_documents"]
146
 
147
- prompt_response_dict = {
148
- "Prompt": user_prompt,
149
- "Answer": answer,
150
- }
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
- prompt_response_dict["Sources"] = []
153
- for document in docs:
154
- prompt_response_dict["Sources"].append(
155
- (os.path.basename(str(document.metadata["source"])), str(document.page_content))
156
- )
157
 
158
- return {"response": prompt_response_dict}
159
- else:
160
- raise HTTPException(status_code=400, detail="Prompt Incorrect")
161
 
162
  @api_app.post("/save_document/")
163
  async def create_upload_file(file: UploadFile):
@@ -208,12 +213,9 @@ async def websocket_endpoint(websocket: WebSocket, client_id: int):
208
  try:
209
  while True:
210
  prompt = await websocket.receive_text()
211
-
212
  response = QA(inputs=prompt, return_only_outputs=True, tags=f'{client_id}', include_run_info=True)
213
-
214
  await websocket.send_text(f'{response}')
215
 
216
-
217
  except WebSocketDisconnect:
218
  print('disconnect')
219
  except RuntimeError as error:
 
136
  raise HTTPException(status_code=400, detail=print(f"The file {file_to_delete} does not exist."))
137
 
138
  @api_app.post('/predict')
139
+ def predict(data: Predict):
140
  global QA
141
+ try:
142
+ user_prompt = data.prompt
143
+ if user_prompt:
144
+ res = QA(user_prompt)
145
 
146
+ answer, docs = res["result"], res["source_documents"]
147
 
148
+ prompt_response_dict = {
149
+ "Prompt": user_prompt,
150
+ "Answer": answer,
151
+ }
152
+
153
+ prompt_response_dict["Sources"] = []
154
+ for document in docs:
155
+ prompt_response_dict["Sources"].append(
156
+ (os.path.basename(str(document.metadata["source"])), str(document.page_content))
157
+ )
158
+
159
+ return {"response": prompt_response_dict}
160
+ else:
161
+ raise HTTPException(status_code=400, detail="Prompt Incorrect")
162
+ except Exception as e:
163
+ raise HTTPException(status_code=500, detail=f"Error occurred: {str(e)}")
164
 
 
 
 
 
 
165
 
 
 
 
166
 
167
  @api_app.post("/save_document/")
168
  async def create_upload_file(file: UploadFile):
 
213
  try:
214
  while True:
215
  prompt = await websocket.receive_text()
 
216
  response = QA(inputs=prompt, return_only_outputs=True, tags=f'{client_id}', include_run_info=True)
 
217
  await websocket.send_text(f'{response}')
218
 
 
219
  except WebSocketDisconnect:
220
  print('disconnect')
221
  except RuntimeError as error: