MitchelHsu commited on
Commit
be41d24
1 Parent(s): f8d7a54

Upload folder using huggingface_hub

Browse files
Files changed (12) hide show
  1. .gitignore +7 -0
  2. Dockerfile +9 -0
  3. Procfile +1 -0
  4. README.md +3 -8
  5. app/agent.py +46 -0
  6. app/app.py +66 -0
  7. app/config.py +40 -0
  8. app/models.py +17 -0
  9. app/ui.py +70 -0
  10. app/utils.py +28 -0
  11. main.py +45 -0
  12. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+
3
+ # PyCharm
4
+ .idea/
5
+
6
+ # test logs
7
+ test_logs/
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-alpine
2
+
3
+ ARG KEY
4
+
5
+ WORKDIR /app
6
+ COPY . .
7
+ RUN pip3 install -r requirements.txt
8
+ ENV OPENAI_API_KEY=$KEY
9
+ CMD ["python3", "app/app.py"]
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app.app:app
README.md CHANGED
@@ -1,12 +1,7 @@
1
  ---
2
- title: Cleric Agent Ui
3
- emoji: 🦀
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 4.27.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: cleric-agent-ui
3
+ app_file: app/ui.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.27.0
 
 
6
  ---
7
+ # Doc-retrieve-agent
 
app/agent.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from config import examples, example_template, template_v2
3
+ from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
4
+
5
+
6
+ class Agent:
7
+ def __init__(self, model):
8
+ self.llm = ChatOpenAI(model=model)
9
+
10
+ # Example template
11
+ self.example_prompt = PromptTemplate(
12
+ template=example_template,
13
+ input_variables=['question', 'logs', 'answer']
14
+ )
15
+
16
+ # Few shot prompt template containing examples and instructions
17
+ self.prompt = FewShotPromptTemplate(
18
+ examples=examples,
19
+ example_prompt=self.example_prompt,
20
+ suffix=template_v2,
21
+ input_variables=['question', 'logs']
22
+ )
23
+
24
+ self.question = None
25
+ self.logs = None
26
+ self.response = None
27
+
28
+ def process_request(self, question, logs):
29
+ self.question = question
30
+ self.logs = logs
31
+
32
+ prompt_formatted = self.prompt.format(
33
+ question=question,
34
+ logs=logs
35
+ )
36
+
37
+ self.response = self.llm.predict(prompt_formatted)
38
+
39
+ def get_question(self):
40
+ return self.question
41
+
42
+ def get_logs(self):
43
+ return self.logs
44
+
45
+ def get_response_list(self):
46
+ return self.response.split('\n')
app/app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from agent import Agent
3
+ from config import MODEL
4
+ from flask import Flask, jsonify, request
5
+ from utils import read_documents, preprocess_logs
6
+ from models import GetQuestionAndFactsResponse, SubmitQuestionAndDocumentsResponse, SubmitQuestionAndDocumentRequest
7
+
8
+ app = Flask(__name__)
9
+ agent = Agent(model=MODEL)
10
+ processing = False
11
+ submitted_data = None
12
+
13
+
14
+ @app.route('/get_question_and_facts', methods=['GET'])
15
+ def get_response():
16
+ global submitted_data, processing, agent
17
+
18
+ print(submitted_data)
19
+
20
+ if not submitted_data:
21
+ response = GetQuestionAndFactsResponse(
22
+ question='',
23
+ facts=[],
24
+ status='No data found.'
25
+ )
26
+ return jsonify(response.dict()), 200
27
+
28
+ if processing:
29
+ response = GetQuestionAndFactsResponse(
30
+ question=submitted_data.question,
31
+ facts=[],
32
+ status='processing'
33
+ )
34
+ return jsonify(response.dict()), 200
35
+
36
+ response = GetQuestionAndFactsResponse(
37
+ question=submitted_data.question,
38
+ facts=agent.get_response_list(),
39
+ status='done'
40
+ )
41
+
42
+ return jsonify(response.dict()), 200
43
+
44
+
45
+ @app.route('/submit_question_and_documents', methods=['POST'])
46
+ def submit_question():
47
+ global submitted_data, processing, agent
48
+ processing = True
49
+ request_content = request.get_json()
50
+ submitted_data = SubmitQuestionAndDocumentRequest(**request_content)
51
+
52
+ logs = read_documents(submitted_data.urls)
53
+ processed_logs = preprocess_logs(logs)
54
+
55
+ agent.process_request(
56
+ question=submitted_data.question,
57
+ logs=processed_logs
58
+ )
59
+
60
+ processing = False
61
+ response = SubmitQuestionAndDocumentsResponse()
62
+ return jsonify(response.dict()), 200
63
+
64
+
65
+ if __name__ == '__main__':
66
+ app.run(host='0.0.0.0', port=8000, debug=True)
app/config.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ template = """
2
+ You are a call log summarization agent. Your job is to extract call summary bulletins only related to the question from a sequence of call logs. Please answer with bullets points only. Do not repeat facts in different bullets, and only response on facts with final decisions, avoid include previous decisions or decision process. Above are some examples of responses.\nGive response to the following question: {question}, according to the meeting logs:\n{logs}
3
+ """
4
+
5
+ template_v2 = """
6
+ You are a call log summarization agent. Your task is to process and extract relevant information from a set of call logs based on a single question provided.
7
+
8
+ Provide a concise list of facts extracted from the call logs that directly answer the question.
9
+ Avoid including the conversation process of the facts in your response.
10
+ Provide only the list, without any other text. Each bullet should only contain one single fact.
11
+ Please be mindful of updated facts/information/decisions and exclude any that have been canceled.
12
+ Above are some response examples.
13
+
14
+ Question: {question}
15
+ List of Call Logs:
16
+ {logs}
17
+ """
18
+
19
+ example_template = """Question: {question}
20
+ List of Call Logs:
21
+ {logs}
22
+ Answer:
23
+ {answer}"""
24
+
25
+ examples = [
26
+ {'logs': "00:00:10 - Alex: Let's choose our app's color scheme today.\n00:00:36 - Jordan: I suggest blue for a calm feel.\n00:00:51 - Casey: We need to make sure it's accessible to all users.",
27
+ 'question': 'What product design decisions did the team make?',
28
+ 'answer': "- The team will use blue for the color scheme of the app.\n- The team will make the app accessible to all users."},
29
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\n John: Hello, everybody. Let's start with the product design discussion. I think we should go with a modular design for our product. It will allow us to easily add or remove features as needed.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: I agree with John. A modular design will provide us with the flexibility we need. Also, I suggest we use a responsive design to ensure our product works well on all devices. Finally, I think we should use websockets to improve latency and provide real-time updates.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: Sounds good to me. I also propose we use a dark theme for the user interface. It's trendy and reduces eye strain for users. Let's hold off on the websockets for now since it's a little bit too much work.",
30
+ 'question': 'What are our product design decisions?',
31
+ 'answer': "- The team has decided to go with a modular design for the product.\n- The team has decided to use a responsive design to ensure the product works well on all devices.\n- The team has decided to use a dark theme for the user interface."},
32
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\nJohn: After giving it some more thought, I believe we should also consider a light theme option for the user interface. This will cater to users who prefer a brighter interface.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: That's a great idea, John. A light theme will provide an alternative to users who find the dark theme too intense.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: I'm on board with that.",
33
+ 'question': 'What are our product design decisions?',
34
+ 'answer': "- The team has decided to go with a modular design for the product.\n- The team has decided to use a responsive design to ensure the product works well on all devices.\n- The team has decided to provide both dark and light theme options for the user interface."},
35
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\nJohn: I've been thinking about our decision on the responsive design. While it's important to ensure our product works well on all devices, I think we should focus on desktop first. Our primary users will be using our product on desktops.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: I see your point, John. Focusing on desktop first will allow us to better cater to our primary users. I agree with this change.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: I agree as well. I also think the idea of using a modular design doesn't make sense. Let's not make that decision yet.",
36
+ 'question': 'What are our product design decisions?',
37
+ 'answer': "- The team has decided to focus on a desktop-first design\n- The team has decided to provide both dark and light theme options for the user interface."},
38
+ ]
39
+
40
+ MODEL = 'gpt-4'
app/models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional, List
3
+
4
+
5
+ class GetQuestionAndFactsResponse(BaseModel):
6
+ question: str
7
+ facts: Optional[List[str]]
8
+ status: str
9
+
10
+
11
+ class SubmitQuestionAndDocumentsResponse(BaseModel):
12
+ pass
13
+
14
+
15
+ class SubmitQuestionAndDocumentRequest(BaseModel):
16
+ question: str
17
+ urls: List[str]
app/ui.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import requests
3
+ import gradio as gr
4
+ from utils import get_url_list
5
+ from models import SubmitQuestionAndDocumentRequest, GetQuestionAndFactsResponse
6
+
7
+ base_url = 'https://cleric-agent-ad76f992e8d8.herokuapp.com/'
8
+
9
+
10
+ def fetch_facts(question, call_log_urls):
11
+ urls = get_url_list(call_log_urls)
12
+ payload = SubmitQuestionAndDocumentRequest(
13
+ question=question,
14
+ urls=urls
15
+ ).dict()
16
+ response = requests.post(f"{base_url}/submit_question_and_documents", json=payload)
17
+
18
+ start_time = time.time()
19
+ while True:
20
+ response = requests.get(f"{base_url}/get_question_and_facts")
21
+ if response.status_code != 200:
22
+ # st.error(f"Unexpected status code when getting question and facts: {response.status_code}")
23
+ return None
24
+ try:
25
+ data = GetQuestionAndFactsResponse(**response.json())
26
+ except ValueError as e:
27
+ # st.error(f"The response data does not match the expected schema: {str(e)}")
28
+ # st.write(response.json()) # Print the invalid data for debugging
29
+ return None
30
+
31
+ if data.status == "done":
32
+ break
33
+ elif time.time() - start_time > 300: # 5 minutes timeout
34
+ # st.error("Timeout: Facts not ready after 5 minutes")
35
+ return None
36
+ time.sleep(1)
37
+
38
+ return '\n'.join(data.facts)
39
+
40
+
41
+ with gr.Blocks() as demo:
42
+ gr.Markdown("""
43
+ # Cleric Call Logs Summarize Agent
44
+
45
+ Please place the URLs in the Call Logs URLs text box, separated by new line.
46
+ Place your question to this call logs, then submit!
47
+ """)
48
+ error_box = gr.Textbox(label="Error", visible=False)
49
+ with gr.Row(equal_height=True):
50
+ call_logs_box = gr.Textbox(label='Call Logs URLs', scale=2)
51
+ facts_box = gr.Textbox(label='Extracted Facts', scale=2)
52
+
53
+ question_box = gr.Textbox(label='Question')
54
+ submit_btn = gr.Button("Submit")
55
+
56
+ submit_btn.click(
57
+ fetch_facts,
58
+ inputs=[call_logs_box, question_box],
59
+ outputs=facts_box
60
+ )
61
+
62
+ # iface = gr.Interface(
63
+ # fn=fetch_facts,
64
+ # inputs=["text", "text"],
65
+ # outputs="text",
66
+ # allow_flagging="never",
67
+ # title="Cleric Call Logs Summarize Agent"
68
+ # )
69
+ demo.launch()
70
+
app/utils.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from typing import List
3
+
4
+
5
+ def load_logs(log_paths: List) -> str:
6
+ logs = ""
7
+ for i, path in enumerate(log_paths):
8
+ with open(path, 'r') as f:
9
+ logs += f"Log {i}:\n" + f.read() + '\n'
10
+
11
+ return logs
12
+
13
+
14
+ def get_url_list(call_log_urls: str) -> List[str]:
15
+ return call_log_urls.split('\n')
16
+
17
+
18
+ def read_documents(documents: List[str]) -> List[str]:
19
+ logs = []
20
+ for url in documents:
21
+ response = requests.get(url)
22
+ logs.append(response.text)
23
+
24
+ return logs
25
+
26
+
27
+ def preprocess_logs(logs: List[str]):
28
+ return '\n'.join(logs)
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from config import *
4
+ from pydantic import BaseModel
5
+ from langchain_openai import ChatOpenAI
6
+ from utils import load_logs, read_documents, preprocess_logs
7
+ from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
8
+
9
+ llm = ChatOpenAI(model=MODEL)
10
+
11
+ class SubmitQuestionAndDocumentsResponse(BaseModel):
12
+ pass # The response body doesn't matter for this endpoint
13
+
14
+
15
+ if __name__ == '__main__':
16
+ documents = [
17
+ "https://storage.googleapis.com/cleric-assignment-call-logs/call_log_20240314_104111.txt",
18
+ "https://storage.googleapis.com/cleric-assignment-call-logs/call_log_20240315_104111.txt",
19
+ "https://storage.googleapis.com/cleric-assignment-call-logs/call_log_20240316_104111.txt"
20
+ ]
21
+
22
+ logs = read_documents(documents)
23
+ processed_logs = preprocess_logs(logs)
24
+
25
+ example_prompt = PromptTemplate(
26
+ template=example_template,
27
+ input_variables=["question", "logs", "answer"]
28
+ )
29
+
30
+ prompt = FewShotPromptTemplate(
31
+ examples=examples,
32
+ example_prompt=example_prompt,
33
+ suffix=template,
34
+ input_variables=["question", "logs"]
35
+ )
36
+
37
+ prompt_formatted = prompt.format(
38
+ question='What product design decisions did the team make?',
39
+ logs=processed_logs
40
+ )
41
+
42
+ print(prompt_formatted)
43
+ # bullets = llm.predict(prompt_formatted)
44
+ # print(bullets)
45
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask
2
+ openai
3
+ pydantic
4
+ langchain
5
+ langchain-openai
6
+ pydantic
7
+ gunicorn