lindsay-qu commited on
Commit
58974f8
1 Parent(s): e960d88

Upload 92 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +40 -0
  2. core/__init__.py +9 -0
  3. core/__pycache__/__init__.cpython-311.pyc +0 -0
  4. core/__pycache__/__init__.cpython-38.pyc +0 -0
  5. core/__pycache__/__init__.cpython-39.pyc +0 -0
  6. core/chain/__init__.py +1 -0
  7. core/chain/__pycache__/__init__.cpython-311.pyc +0 -0
  8. core/chain/__pycache__/__init__.cpython-38.pyc +0 -0
  9. core/chain/__pycache__/__init__.cpython-39.pyc +0 -0
  10. core/chain/__pycache__/base_chain.cpython-311.pyc +0 -0
  11. core/chain/__pycache__/base_chain.cpython-38.pyc +0 -0
  12. core/chain/__pycache__/base_chain.cpython-39.pyc +0 -0
  13. core/chain/base_chain.py +10 -0
  14. core/chain/simple_chain.py +19 -0
  15. core/chatbot/__init__.py +2 -0
  16. core/chatbot/__pycache__/__init__.cpython-311.pyc +0 -0
  17. core/chatbot/__pycache__/__init__.cpython-39.pyc +0 -0
  18. core/chatbot/__pycache__/base_chatbot.cpython-311.pyc +0 -0
  19. core/chatbot/__pycache__/base_chatbot.cpython-39.pyc +0 -0
  20. core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc +0 -0
  21. core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc +0 -0
  22. core/chatbot/base_chatbot.py +12 -0
  23. core/chatbot/retrieval_chatbot.py +98 -0
  24. core/memory/__init__.py +2 -0
  25. core/memory/__pycache__/__init__.cpython-311.pyc +0 -0
  26. core/memory/__pycache__/__init__.cpython-39.pyc +0 -0
  27. core/memory/__pycache__/base_memory.cpython-311.pyc +0 -0
  28. core/memory/__pycache__/base_memory.cpython-39.pyc +0 -0
  29. core/memory/__pycache__/chat_memory.cpython-311.pyc +0 -0
  30. core/memory/__pycache__/chat_memory.cpython-39.pyc +0 -0
  31. core/memory/base_memory.py +18 -0
  32. core/memory/chat_memory.py +22 -0
  33. core/memory/plan_memory.py +38 -0
  34. core/planner/__init__.py +1 -0
  35. core/planner/__pycache__/__init__.cpython-311.pyc +0 -0
  36. core/planner/__pycache__/__init__.cpython-39.pyc +0 -0
  37. core/planner/__pycache__/base_planner.cpython-311.pyc +0 -0
  38. core/planner/__pycache__/base_planner.cpython-39.pyc +0 -0
  39. core/planner/base_planner.py +6 -0
  40. core/refiner/__init__.py +2 -0
  41. core/refiner/__pycache__/__init__.cpython-311.pyc +0 -0
  42. core/refiner/__pycache__/__init__.cpython-39.pyc +0 -0
  43. core/refiner/__pycache__/base_refiner.cpython-311.pyc +0 -0
  44. core/refiner/__pycache__/base_refiner.cpython-39.pyc +0 -0
  45. core/refiner/__pycache__/simple_refiner.cpython-311.pyc +0 -0
  46. core/refiner/__pycache__/simple_refiner.cpython-39.pyc +0 -0
  47. core/refiner/base_refiner.py +11 -0
  48. core/refiner/recursive_refiner.py +0 -0
  49. core/refiner/simple_refiner.py +23 -0
  50. core/retriever/__init__.py +3 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import core
2
+ import openai
3
+ import models
4
+ import time
5
+ import gradio as gr
6
+ import os
7
+
8
+ api_key = os.environ["OPENAI_API_KEY"]
9
+ api_base = os.environ["OPENAI_API_BASE"]
10
+
11
+ # def embed(texts: list):
12
+ # return openai.Embedding.create(input=texts, model="text-embedding-ada-002")["data"]["embedding"]
13
+
14
+ def chatbot_initialize():
15
+ retriever = core.retriever.ChromaRetriever(pdf_dir="",
16
+ collection_name="langchain",
17
+ split_args={"size": 2048, "overlap": 10}, #embedding_model="text-embedding-ada-002"
18
+ embed_model=models.BiomedModel()
19
+ )
20
+ Chatbot = core.chatbot.RetrievalChatbot(retriever=retriever)
21
+ return Chatbot
22
+
23
+ def respond(query, additional_inputs, image):
24
+ global Chatbot
25
+ response = Chatbot.response(query, image)
26
+ for i in range(len(response)):
27
+ time.sleep(0.01)
28
+ yield response[: i+1]
29
+
30
+ if __name__ == "__main__":
31
+ global Chatbot
32
+ Chatbot=chatbot_initialize()
33
+
34
+ demo = gr.ChatInterface(
35
+ fn=respond,
36
+ additional_inputs=[
37
+ gr.Image(type="filepath"),
38
+ ]
39
+ )
40
+ demo.queue().launch()
core/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .chain import *
2
+ from .chatbot import *
3
+ from .memory import *
4
+ from .planner import *
5
+ from .refiner import *
6
+ from .retriever import *
7
+
8
+ from models import *
9
+ from prompts import *
core/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (387 Bytes). View file
 
core/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (291 Bytes). View file
 
core/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (291 Bytes). View file
 
core/chain/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .base_chain import BaseChain
core/chain/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (213 Bytes). View file
 
core/chain/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (181 Bytes). View file
 
core/chain/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (200 Bytes). View file
 
core/chain/__pycache__/base_chain.cpython-311.pyc ADDED
Binary file (892 Bytes). View file
 
core/chain/__pycache__/base_chain.cpython-38.pyc ADDED
Binary file (731 Bytes). View file
 
core/chain/__pycache__/base_chain.cpython-39.pyc ADDED
Binary file (750 Bytes). View file
 
core/chain/base_chain.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ class BaseChain:
3
+ def __init__(self, chain: list):
4
+ raise NotImplementedError
5
+
6
+ def append(self, item: str):
7
+ raise NotImplementedError
8
+
9
+ def execute(self):
10
+ raise NotImplementedError
core/chain/simple_chain.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from base_chain import BaseChain
2
+
3
+ # class SimpleChain(BaseChain):
4
+ # def __init__(self, chain: list[str]):
5
+ # self.chain = chain if chain else []
6
+
7
+ # def append(self, item: str):
8
+ # self.chain.append(item)
9
+
10
+ # def execute(self):
11
+ # # raise NotImplementedError
12
+ # for item in self.chain:
13
+ # pass
14
+ # #todo: execute item
15
+ # # item --> result
16
+ # item.execute(param=param)
17
+ # # result --> next item
18
+
19
+ # return result
core/chatbot/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .base_chatbot import BaseChatbot
2
+ from .retrieval_chatbot import RetrievalChatbot
core/chatbot/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (293 Bytes). View file
 
core/chatbot/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (264 Bytes). View file
 
core/chatbot/__pycache__/base_chatbot.cpython-311.pyc ADDED
Binary file (1 kB). View file
 
core/chatbot/__pycache__/base_chatbot.cpython-39.pyc ADDED
Binary file (784 Bytes). View file
 
core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc ADDED
Binary file (5.38 kB). View file
 
core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc ADDED
Binary file (3.38 kB). View file
 
core/chatbot/base_chatbot.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import BaseModel
2
+ from ..memory import BaseMemory
3
+ class BaseChatbot:
4
+ def __init__(self,
5
+ model: BaseModel,
6
+ memory: BaseMemory
7
+ ) -> None:
8
+ self.model = model
9
+ self.memory = memory
10
+
11
+ def respond(self, message: str) -> str:
12
+ raise NotImplementedError
core/chatbot/retrieval_chatbot.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_chatbot import BaseChatbot
2
+ from ..memory import BaseMemory, ChatMemory
3
+ from ..retriever import BaseRetriever, ChromaRetriever, FaissRetriever
4
+ from ..refiner import BaseRefiner, SimpleRefiner
5
+ from models import BaseModel, GPT4Model
6
+ from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
7
+ import ast
8
+ from utils.image_encoder import encode_image
9
+
10
+ # QA_PROMPT = "\
11
+ # You are a Question-Answering Chatbot. \
12
+ # Given some references and a question, please answer the question according to the references. \
13
+ # If you find the references insufficient, you can answer the question according to your own knowledge. \
14
+ # ONLY output the answer. \
15
+ # "
16
+ # QUESTION_PROMPT = "\
17
+ # You are a Question Refiner. \
18
+ # Given a question, you need to break it down to several subquestions and output a list of string: [\"<subquestion1>\", \"<subquestion2>\", ...]. \
19
+ # MAKE SURE there are no vague concepts in each subquestion that require reference to other subquestions, such as determiners, pronominal and so on. \
20
+ # If the question cannot be broken down, you need to rephrase it in 3 ways and output a list of string: [\"<rephrase1>\", \"<rephrase2>\", \"<rephrase3>\"]. \
21
+ # ONLY output the list of subquestions or rephrases. \
22
+ # "
23
+ # SUMMARY_PROMPT = "\
24
+ # You are a Summary Refiner. \
25
+ # Given a question and several answers to it, you need to organize and summarize the answers to form one coherent answer to the question. \
26
+ # ONLY output the summarized answer. \
27
+ # "
28
+ # REFERENCE_PROMPT = "\
29
+ # You are a Reference Refiner. \
30
+ # Given paragraphs extract from a paper, you need to remove the unnecessary and messy symbols to make it more readable. \
31
+ # But keep the original expression and sentences as much as possible. \
32
+ # ONLY output the refined paragraphs. \
33
+ # "
34
+ class RetrievalChatbot(BaseChatbot):
35
+ def __init__(self,
36
+ model: BaseModel = None,
37
+ memory: BaseMemory = None,
38
+ retriever: BaseRetriever = None,
39
+ decomposer: BaseRefiner = None,
40
+ answerer: BaseRefiner = None,
41
+ summarizer: BaseRefiner = None,
42
+ ) -> None:
43
+ self.model = model if model \
44
+ else GPT4Model()
45
+ self.memory = memory if memory \
46
+ else ChatMemory(sys_prompt=SummaryPrompt.content)
47
+ self.retriever = retriever if retriever \
48
+ else ChromaRetriever(pdf_dir="papers_all",
49
+ collection_name="pdfs",
50
+ split_args={"size": 2048, "overlap": 10},
51
+ embed_model=GPT4Model())
52
+ self.decomposer = decomposer if decomposer \
53
+ else SimpleRefiner(model=GPT4Model(), sys_prompt=DecomposePrompt.content)
54
+ self.answerer = answerer if answerer \
55
+ else SimpleRefiner(model=GPT4Model(), sys_prompt=QAPrompt.content)
56
+ self.summarizer = summarizer if summarizer \
57
+ else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
58
+
59
+ def response(self, message: str, image_path=None) -> str:
60
+ print("Query: {message}".format(message=message))
61
+ question = self.decomposer.refine(message,image_path)
62
+ print(question)
63
+ sub_questions = ast.literal_eval(question)
64
+ print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
65
+ references = ""
66
+ for sub_question in sub_questions:
67
+ print("="*20)
68
+ print(f"Subquestion: {sub_question}")
69
+
70
+ print(f"Retrieving pdf papers for references...\n")
71
+ sub_retrieve_reference = references
72
+ sub_retrieve = self.retriever.retrieve(sub_question)
73
+ for ref in sub_retrieve:
74
+ sub_retrieve_reference += "Related research: {ref}\n".format(ref=ref)
75
+ # context = self.memory.messages + [{"role": "user", "content": "References: {references}\nQuestion: {question}".format(references=reference, question=sub_question)}]
76
+ # sub_answer = self.model.respond(context)
77
+ sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
78
+ sub_answer = self.answerer.refine(sub_answerer_context,image_path)
79
+
80
+ print(f"Subanswer: {sub_answer}")
81
+
82
+ references += "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n".format(sub_question=sub_question, sub_answer=sub_answer)
83
+
84
+ refs = self.retriever.retrieve(message)
85
+ for ref in refs:
86
+ references += "Related research for the user query: {ref}\n".format(ref=ref)
87
+
88
+ summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
89
+ answer = self.summarizer.refine(summarizer_context,image_path)
90
+
91
+ #todo 记忆管理
92
+ self.memory.append([{"role": "user", "content": [
93
+ {"type": "text", "text": f"{message}"},
94
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}},
95
+ ]}, {"role": "assistant", "content": answer}])
96
+ print("="*20)
97
+ print(f"Final answer: {answer}".format(answer=answer))
98
+ return answer
core/memory/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .base_memory import BaseMemory
2
+ from .chat_memory import ChatMemory
core/memory/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (278 Bytes). View file
 
core/memory/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (249 Bytes). View file
 
core/memory/__pycache__/base_memory.cpython-311.pyc ADDED
Binary file (1.31 kB). View file
 
core/memory/__pycache__/base_memory.cpython-39.pyc ADDED
Binary file (1.12 kB). View file
 
core/memory/__pycache__/chat_memory.cpython-311.pyc ADDED
Binary file (1.66 kB). View file
 
core/memory/__pycache__/chat_memory.cpython-39.pyc ADDED
Binary file (1.26 kB). View file
 
core/memory/base_memory.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class BaseMemory:
2
+ def __init__(self) -> None:
3
+ raise NotImplementedError
4
+
5
+ def append(self, message: str) -> None:
6
+ raise NotImplementedError
7
+
8
+ def pop(self) -> None:
9
+ raise NotImplementedError
10
+
11
+ def clear(self) -> None:
12
+ raise NotImplementedError
13
+
14
+ def load(self) -> None:
15
+ raise NotImplementedError
16
+
17
+ def save(self) -> None:
18
+ raise NotImplementedError
core/memory/chat_memory.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_memory import BaseMemory
2
+
3
+ class ChatMemory(BaseMemory):
4
+ def __init__(self, sys_prompt = None) -> None:
5
+ self.sys_prompt = sys_prompt
6
+ self.messages = [{"role": "system", "content": sys_prompt}] if sys_prompt else []
7
+
8
+ def append(self, message: list) -> None:
9
+ # assert
10
+ self.messages += message
11
+
12
+ def pop(self) -> None:
13
+ self.messages.pop()
14
+
15
+ def clear(self) -> None:
16
+ self.messages = [{"role": "system", "content": self.sys_prompt}]
17
+
18
+ def load(self) -> None:
19
+ pass
20
+
21
+ def save(self) -> None:
22
+ pass
core/memory/plan_memory.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base_memory import BaseMemory
2
+
3
+ from dataclasses import dataclass
4
+
5
+ @dataclass
6
+ class Task:
7
+ def __init__(self, name: str, description: str):
8
+ self.name = name
9
+ self.description = description
10
+
11
+ class TaskChain:
12
+ def __init__(self, tasks: list):
13
+ self.tasks = tasks
14
+ def append(self, task: Task):
15
+ self.tasks.append(task)
16
+ def clear(self):
17
+ self.tasks = []
18
+ def __str__(self):
19
+ return "\n".join([f"{task.name}: {task.description}" for task in self.tasks])
20
+
21
+ class PlanMemory(BaseMemory):
22
+ def __init__(self, initial_message, initial_task) -> None:
23
+ self.messages = initial_message if initial_message else []
24
+ self.tasks = TaskChain(initial_task) if initial_task else TaskChain([])
25
+
26
+ def append(self, message: str) -> None:
27
+ self.messages.append(message)
28
+ #todo: parse message for tasks & add to task chain
29
+ self.tasks.append(Task("Task", message))
30
+
31
+ def clear(self) -> None:
32
+ self.messages = []
33
+
34
+ def load(self) -> None:
35
+ pass
36
+
37
+ def save(self) -> None:
38
+ pass
core/planner/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .base_planner import BasePlanner
core/planner/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (219 Bytes). View file
 
core/planner/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (206 Bytes). View file
 
core/planner/__pycache__/base_planner.cpython-311.pyc ADDED
Binary file (767 Bytes). View file
 
core/planner/__pycache__/base_planner.cpython-39.pyc ADDED
Binary file (625 Bytes). View file
 
core/planner/base_planner.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+ class BasePlanner:
3
+ def __init__(self):
4
+ raise NotImplementedError
5
+ def plan(self, message: str) -> list[str]:
6
+ raise NotImplementedError
core/refiner/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .base_refiner import BaseRefiner
2
+ from .simple_refiner import SimpleRefiner
core/refiner/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (287 Bytes). View file
 
core/refiner/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (258 Bytes). View file
 
core/refiner/__pycache__/base_refiner.cpython-311.pyc ADDED
Binary file (938 Bytes). View file
 
core/refiner/__pycache__/base_refiner.cpython-39.pyc ADDED
Binary file (738 Bytes). View file
 
core/refiner/__pycache__/simple_refiner.cpython-311.pyc ADDED
Binary file (1.64 kB). View file
 
core/refiner/__pycache__/simple_refiner.cpython-39.pyc ADDED
Binary file (895 Bytes). View file
 
core/refiner/base_refiner.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import BaseModel
2
+ class BaseRefiner:
3
+ def __init__(self,
4
+ sys_prompt: str,
5
+ model: BaseModel,
6
+ ) -> None:
7
+ self.sys_prompt = sys_prompt
8
+ self.model = model
9
+
10
+ def refine(self, message: str) -> str:
11
+ raise NotImplementedError
core/refiner/recursive_refiner.py ADDED
File without changes
core/refiner/simple_refiner.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import BaseModel
2
+ from .base_refiner import BaseRefiner
3
+ from utils.image_encoder import encode_image
4
+ class SimpleRefiner(BaseRefiner):
5
+ def __init__(self,
6
+ sys_prompt: str,
7
+ model: BaseModel,
8
+ ) -> None:
9
+ BaseRefiner.__init__(self, sys_prompt=sys_prompt, model=model)
10
+
11
+ def refine(self, message: str, image_path=None) -> str:
12
+ if image_path:
13
+ context = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": [
14
+ {"type": "text", "text": f"{message}"},
15
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}},
16
+ ]}]
17
+ else:
18
+ context = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": [
19
+ {"type": "text", "text": f"{message}"},
20
+ ]}]
21
+ response = self.model.respond(context)
22
+
23
+ return response
core/retriever/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .base_retriever import BaseRetriever
2
+ from .chroma_retriever import ChromaRetriever
3
+ from .faiss_retriever import FaissRetriever