MitchelHsu commited on
Commit
6774ab2
1 Parent(s): c8cd6ca

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. .gitignore +4 -1
  2. Procfile +1 -0
  3. app/agent.py +60 -0
  4. app/app.py +107 -0
  5. app/config.py +47 -0
  6. app/ui.py +2 -2
.gitignore CHANGED
@@ -4,4 +4,7 @@ __pycache__/
4
  .idea/
5
 
6
  # test logs
7
- test_logs/
 
 
 
 
4
  .idea/
5
 
6
  # test logs
7
+ test_logs/
8
+
9
+ # Test file
10
+ main.py
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app.app:app
app/agent.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils import preprocess_logs
2
+ from langchain_openai import ChatOpenAI
3
+ from config import examples, example_template, template_v2
4
+ from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate
5
+
6
+
7
+ class Agent:
8
+ def __init__(self, model):
9
+ self.llm = ChatOpenAI(model=model)
10
+
11
+ # Example template
12
+ self.example_prompt = PromptTemplate(
13
+ template=example_template,
14
+ input_variables=['question', 'logs', 'answer']
15
+ )
16
+
17
+ # Few shot prompt template containing examples and instructions
18
+ self.prompt = FewShotPromptTemplate(
19
+ examples=examples,
20
+ example_prompt=self.example_prompt,
21
+ suffix=template_v2,
22
+ input_variables=['question', 'logs']
23
+ )
24
+
25
+ self.question = None
26
+ self.logs = None
27
+ self.response = None
28
+
29
+ def summarize(self, question, logs, retriever=None):
30
+ self.question = question
31
+ if retriever:
32
+ retrieved_logs = self.retrieve_logs(retriever)
33
+ prompt_formatted = self.prompt.format(
34
+ question=question,
35
+ logs=retrieved_logs
36
+ )
37
+ else:
38
+ self.logs = preprocess_logs(logs)
39
+
40
+ prompt_formatted = self.prompt.format(
41
+ question=question,
42
+ logs=self.logs
43
+ )
44
+
45
+ self.response = self.llm.predict(prompt_formatted)
46
+
47
+ def retrieve_logs(self, retriever):
48
+ retriever_nodes = retriever.retrieve(self.question)
49
+ retriever_nodes = sorted(retriever_nodes, key=lambda n: n.node_id, reverse=True)
50
+
51
+ return '\n'.join([node.text for node in retriever_nodes])
52
+
53
+ def get_question(self):
54
+ return self.question
55
+
56
+ def get_logs(self):
57
+ return self.logs
58
+
59
+ def get_response_list(self):
60
+ return self.response.split('\n')
app/app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agent import Agent
2
+ from flask import Flask, jsonify, request
3
+ from config import MODEL, CHUNK_SIZE, CHUNK_OVERLAP, RETRIEVE_TOP_K
4
+ from utils import read_documents, validate_request_logs, preprocess_logs
5
+ from models import GetQuestionAndFactsResponse, SubmitQuestionAndDocumentsResponse, SubmitQuestionAndDocumentRequest
6
+
7
+ from llama_index.core import VectorStoreIndex, Document
8
+ from llama_index.core.node_parser import SentenceSplitter
9
+ from llama_index.core.retrievers import VectorIndexRetriever
10
+
11
+ app = Flask(__name__)
12
+ agent = Agent(model=MODEL)
13
+ processing = False
14
+ submitted_data = None
15
+
16
+
17
+ @app.route('/get_question_and_facts', methods=['GET'])
18
+ def get_response():
19
+ global submitted_data, processing, agent
20
+
21
+ # If no data found
22
+ if not submitted_data:
23
+ response = GetQuestionAndFactsResponse(
24
+ question='',
25
+ facts=[],
26
+ status='No data found, please submit data'
27
+ )
28
+ return jsonify(response.dict()), 200
29
+
30
+ # If still processing request
31
+ if processing:
32
+ response = GetQuestionAndFactsResponse(
33
+ question=submitted_data.question,
34
+ facts=[],
35
+ status='processing'
36
+ )
37
+ return jsonify(response.dict()), 200
38
+
39
+ # Request processed, create response with Agent summarization
40
+ response = GetQuestionAndFactsResponse(
41
+ question=submitted_data.question,
42
+ facts=agent.get_response_list(),
43
+ status='done'
44
+ )
45
+
46
+ return jsonify(response.dict()), 200
47
+
48
+
49
+ @app.route('/submit_question_and_documents', methods=['POST'])
50
+ def submit_question():
51
+ global submitted_data, processing, agent
52
+ processing = True
53
+ request_content = request.get_json()
54
+
55
+ # Submit payload read and validation
56
+ try:
57
+ submitted_data = SubmitQuestionAndDocumentRequest(**request_content)
58
+ except ValueError as e:
59
+ response = SubmitQuestionAndDocumentsResponse(status=f'Request payload does not match expected schema: {str(e)}')
60
+ return jsonify(response.dict()), 200
61
+
62
+ # Validate request URLS formats
63
+ try:
64
+ validate_request_logs(submitted_data.documents)
65
+ except ValueError as e:
66
+ # Respond with URL validation failed error
67
+ response = SubmitQuestionAndDocumentsResponse(status=f'URL validation failed: {e}')
68
+ return jsonify(response.dict()), 200
69
+
70
+ # Try loading documents
71
+ try:
72
+ logs = read_documents(submitted_data.documents)
73
+ except Exception as e:
74
+ # Respond with URL read fail if URL read error
75
+ response = SubmitQuestionAndDocumentsResponse(status=f'URL read failed: {e}')
76
+ return jsonify(response.dict()), 200
77
+
78
+ # If no data found
79
+ if len(logs) == 0:
80
+ response = SubmitQuestionAndDocumentsResponse(status=f'No data found in the URLs')
81
+ return jsonify(response.dict()), 200
82
+
83
+ documents = [Document(text=preprocess_logs(logs))]
84
+ parser = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
85
+ nodes = parser.get_nodes_from_documents(documents)
86
+
87
+ # Set node time stamp
88
+ for i, node in enumerate(nodes):
89
+ node.id_ = i
90
+
91
+ index = VectorStoreIndex(nodes)
92
+ retriever = VectorIndexRetriever(index, similarity_top_k=RETRIEVE_TOP_K)
93
+
94
+ # Call agent to summarize logs
95
+ agent.summarize(
96
+ question=submitted_data.question,
97
+ logs=logs,
98
+ retriever=retriever
99
+ )
100
+
101
+ processing = False
102
+ response = SubmitQuestionAndDocumentsResponse(status='success')
103
+ return jsonify(response.dict()), 200
104
+
105
+
106
+ if __name__ == '__main__':
107
+ app.run(host='0.0.0.0', port=8000, debug=True)
app/config.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # LangChain templates
3
+ template = """
4
+ You are a call log summarization agent. Your job is to extract call summary bulletins only related to the question from a sequence of call logs. Please answer with bullets points only. Do not repeat facts in different bullets, and only response on facts with final decisions, avoid include previous decisions or decision process. Above are some examples of responses.\nGive response to the following question: {question}, according to the meeting logs:\n{logs}
5
+ """
6
+
7
+ template_v2 = """
8
+ You are a call log fact extraction agent. Your task is to process and extract information from a set of call logs based on a single question provided.
9
+
10
+ Provide a concise list of facts extracted from the call logs that directly answer the question. Avoid including the conversation process of the facts in your response. Provide only the list, without any other text. Each bullet should only contain one single fact. Please be mindful of the order of the logs and the updated facts/information/decisions and exclude any that have been canceled, including any associated relations.
11
+ If the question seems irrelevant to the call logs provided, please just reply "The question seems irrelevant to the call logs provided.".
12
+ Above are some response examples.
13
+
14
+ Question: {question}
15
+ List of Call Logs:
16
+ {logs}
17
+ """
18
+
19
+ example_template = """Question: {question}
20
+ List of Call Logs:
21
+ {logs}
22
+ Answer:
23
+ {answer}"""
24
+
25
+ examples = [
26
+ {'logs': "00:00:10 - Alex: Let's choose our app's color scheme today.\n00:00:36 - Jordan: I suggest blue for a calm feel.\n00:00:51 - Casey: We need to make sure it's accessible to all users.",
27
+ 'question': 'What product design decisions did the team make?',
28
+ 'answer': "- The team will use blue for the color scheme of the app.\n- The team will make the app accessible to all users."},
29
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\n John: Hello, everybody. Let's start with the product design discussion. I think we should go with a modular design for our product. It will allow us to easily add or remove features as needed.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: I agree with John. A modular design will provide us with the flexibility we need. Also, I suggest we use a responsive design to ensure our product works well on all devices. Finally, I think we should use websockets to improve latency and provide real-time updates.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: Sounds good to me. I also propose we use a dark theme for the user interface. It's trendy and reduces eye strain for users. Let's hold off on the websockets for now since it's a little bit too much work.",
30
+ 'question': 'What are our product design decisions?',
31
+ 'answer': "- The team has decided to go with a modular design for the product.\n- The team has decided to use a responsive design to ensure the product works well on all devices.\n- The team has decided to use a dark theme for the user interface."},
32
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\nJohn: After giving it some more thought, I believe we should also consider a light theme option for the user interface. This will cater to users who prefer a brighter interface.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: That's a great idea, John. A light theme will provide an alternative to users who find the dark theme too intense.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: I'm on board with that.",
33
+ 'question': 'What are our product design decisions?',
34
+ 'answer': "- The team has decided to go with a modular design for the product.\n- The team has decided to use a responsive design to ensure the product works well on all devices.\n- The team has decided to provide both dark and light theme options for the user interface."},
35
+ {'logs': "1\n00:01:11,430 --> 00:01:40,520\nJohn: I've been thinking about our decision on the responsive design. While it's important to ensure our product works well on all devices, I think we should focus on desktop first. Our primary users will be using our product on desktops.\n\n2\n00:01:41,450 --> 00:01:49,190\nSara: I see your point, John. Focusing on desktop first will allow us to better cater to our primary users. I agree with this change.\n\n3\n00:01:49,340 --> 00:01:50,040\nMike: I agree as well. I also think the idea of using a modular design doesn't make sense. Let's not make that decision yet.",
36
+ 'question': 'What are our product design decisions?',
37
+ 'answer': "- The team has decided to focus on a desktop-first design\n- The team has decided to provide both dark and light theme options for the user interface."},
38
+ ]
39
+
40
+ # OPENAI MODEL
41
+ MODEL = 'gpt-4'
42
+
43
+ # LLamaIndex Configs
44
+ CHUNK_SIZE = 80 # For parser
45
+ CHUNK_OVERLAP = 20
46
+
47
+ RETRIEVE_TOP_K = 10
app/ui.py CHANGED
@@ -1,11 +1,11 @@
 
1
  import time
2
  import requests
3
  import gradio as gr
4
  from utils import get_url_list
5
  from models import SubmitQuestionAndDocumentRequest, GetQuestionAndFactsResponse, SubmitQuestionAndDocumentsResponse
6
 
7
- base_url = 'https://cleric-agent-api-untxx3isja-uc.a.run.app'
8
- # base_url = 'http://localhost:8000'
9
 
10
 
11
  def fetch_facts(question, call_log_urls):
 
1
+ import os
2
  import time
3
  import requests
4
  import gradio as gr
5
  from utils import get_url_list
6
  from models import SubmitQuestionAndDocumentRequest, GetQuestionAndFactsResponse, SubmitQuestionAndDocumentsResponse
7
 
8
+ base_url = os.environ['CLERIC_API_ENDPOINT']
 
9
 
10
 
11
  def fetch_facts(question, call_log_urls):