Spaces:

lindsay-qu
/

protein-retrieval-multimodal

Sleeping

App Files Files Community

lindsay-qu commited on Jan 4, 2024

Commit

58974f8

1 Parent(s): e960d88

Upload 92 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +40 -0
core/__init__.py +9 -0
core/__pycache__/__init__.cpython-311.pyc +0 -0
core/__pycache__/__init__.cpython-38.pyc +0 -0
core/__pycache__/__init__.cpython-39.pyc +0 -0
core/chain/__init__.py +1 -0
core/chain/__pycache__/__init__.cpython-311.pyc +0 -0
core/chain/__pycache__/__init__.cpython-38.pyc +0 -0
core/chain/__pycache__/__init__.cpython-39.pyc +0 -0
core/chain/__pycache__/base_chain.cpython-311.pyc +0 -0
core/chain/__pycache__/base_chain.cpython-38.pyc +0 -0
core/chain/__pycache__/base_chain.cpython-39.pyc +0 -0
core/chain/base_chain.py +10 -0
core/chain/simple_chain.py +19 -0
core/chatbot/__init__.py +2 -0
core/chatbot/__pycache__/__init__.cpython-311.pyc +0 -0
core/chatbot/__pycache__/__init__.cpython-39.pyc +0 -0
core/chatbot/__pycache__/base_chatbot.cpython-311.pyc +0 -0
core/chatbot/__pycache__/base_chatbot.cpython-39.pyc +0 -0
core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc +0 -0
core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc +0 -0
core/chatbot/base_chatbot.py +12 -0
core/chatbot/retrieval_chatbot.py +98 -0
core/memory/__init__.py +2 -0
core/memory/__pycache__/__init__.cpython-311.pyc +0 -0
core/memory/__pycache__/__init__.cpython-39.pyc +0 -0
core/memory/__pycache__/base_memory.cpython-311.pyc +0 -0
core/memory/__pycache__/base_memory.cpython-39.pyc +0 -0
core/memory/__pycache__/chat_memory.cpython-311.pyc +0 -0
core/memory/__pycache__/chat_memory.cpython-39.pyc +0 -0
core/memory/base_memory.py +18 -0
core/memory/chat_memory.py +22 -0
core/memory/plan_memory.py +38 -0
core/planner/__init__.py +1 -0
core/planner/__pycache__/__init__.cpython-311.pyc +0 -0
core/planner/__pycache__/__init__.cpython-39.pyc +0 -0
core/planner/__pycache__/base_planner.cpython-311.pyc +0 -0
core/planner/__pycache__/base_planner.cpython-39.pyc +0 -0
core/planner/base_planner.py +6 -0
core/refiner/__init__.py +2 -0
core/refiner/__pycache__/__init__.cpython-311.pyc +0 -0
core/refiner/__pycache__/__init__.cpython-39.pyc +0 -0
core/refiner/__pycache__/base_refiner.cpython-311.pyc +0 -0
core/refiner/__pycache__/base_refiner.cpython-39.pyc +0 -0
core/refiner/__pycache__/simple_refiner.cpython-311.pyc +0 -0
core/refiner/__pycache__/simple_refiner.cpython-39.pyc +0 -0
core/refiner/base_refiner.py +11 -0
core/refiner/recursive_refiner.py +0 -0
core/refiner/simple_refiner.py +23 -0
core/retriever/__init__.py +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import core
+import openai
+import models
+import time
+import gradio as gr
+import os
+api_key = os.environ["OPENAI_API_KEY"]
+api_base = os.environ["OPENAI_API_BASE"]
+# def embed(texts: list):
+#         return openai.Embedding.create(input=texts, model="text-embedding-ada-002")["data"]["embedding"]
+def chatbot_initialize():
+    retriever = core.retriever.ChromaRetriever(pdf_dir="",
+                                               collection_name="langchain",
+                                               split_args={"size": 2048, "overlap": 10}, #embedding_model="text-embedding-ada-002"
+                                               embed_model=models.BiomedModel()
+                                               )
+    Chatbot = core.chatbot.RetrievalChatbot(retriever=retriever)
+    return Chatbot
+def respond(query, additional_inputs, image):
+    global Chatbot
+    response = Chatbot.response(query, image)
+    for i in range(len(response)):
+        time.sleep(0.01)
+        yield response[: i+1]
+if __name__ == "__main__":
+    global Chatbot
+    Chatbot=chatbot_initialize()
+    demo = gr.ChatInterface(
+        fn=respond,
+        additional_inputs=[
+            gr.Image(type="filepath"),
+        ]
+    )
+    demo.queue().launch()

core/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .chain import *
+from .chatbot import *
+from .memory import *
+from .planner import *
+from .refiner import *
+from .retriever import *
+from models import *
+from prompts import *

core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (387 Bytes). View file

core/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (291 Bytes). View file

core/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (291 Bytes). View file

core/chain/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .base_chain import BaseChain

core/chain/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (213 Bytes). View file

core/chain/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (181 Bytes). View file

core/chain/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (200 Bytes). View file

core/chain/__pycache__/base_chain.cpython-311.pyc ADDED Viewed

Binary file (892 Bytes). View file

core/chain/__pycache__/base_chain.cpython-38.pyc ADDED Viewed

Binary file (731 Bytes). View file

core/chain/__pycache__/base_chain.cpython-39.pyc ADDED Viewed

Binary file (750 Bytes). View file

core/chain/base_chain.py ADDED Viewed

	@@ -0,0 +1,10 @@

+class BaseChain:
+    def __init__(self, chain: list):
+        raise NotImplementedError
+    def append(self, item: str):
+        raise NotImplementedError
+    def execute(self):
+        raise NotImplementedError

core/chain/simple_chain.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from base_chain import BaseChain
+# class SimpleChain(BaseChain):
+#     def __init__(self, chain: list[str]):
+#         self.chain = chain if chain else []
+#     def append(self, item: str):
+#         self.chain.append(item)
+#     def execute(self):
+#         # raise NotImplementedError
+#         for item in self.chain:
+#             pass
+#             #todo: execute item
+#             # item --> result
+#             item.execute(param=param)
+#             # result --> next item
+#         return result

core/chatbot/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .base_chatbot import BaseChatbot
2	+ from .retrieval_chatbot import RetrievalChatbot

core/chatbot/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (293 Bytes). View file

core/chatbot/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (264 Bytes). View file

core/chatbot/__pycache__/base_chatbot.cpython-311.pyc ADDED Viewed

Binary file (1 kB). View file

core/chatbot/__pycache__/base_chatbot.cpython-39.pyc ADDED Viewed

Binary file (784 Bytes). View file

core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc ADDED Viewed

Binary file (5.38 kB). View file

core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc ADDED Viewed

Binary file (3.38 kB). View file

core/chatbot/base_chatbot.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from models import BaseModel
+from ..memory import BaseMemory
+class BaseChatbot:
+    def __init__(self,
+                 model: BaseModel,
+                 memory: BaseMemory
+        ) -> None:
+        self.model = model
+        self.memory = memory
+    def respond(self, message: str) -> str:
+        raise NotImplementedError

core/chatbot/retrieval_chatbot.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from .base_chatbot import BaseChatbot
+from ..memory import BaseMemory, ChatMemory
+from ..retriever import BaseRetriever, ChromaRetriever, FaissRetriever
+from ..refiner import BaseRefiner, SimpleRefiner
+from models import BaseModel, GPT4Model
+from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt
+import ast
+from utils.image_encoder import encode_image
+# QA_PROMPT = "\
+# You are a Question-Answering Chatbot. \
+# Given some references and a question, please answer the question according to the references. \
+# If you find the references insufficient, you can answer the question according to your own knowledge. \
+# ONLY output the answer. \
+# "
+# QUESTION_PROMPT = "\
+# You are a Question Refiner. \
+# Given a question, you need to break it down to several subquestions and output a list of string: [\"<subquestion1>\", \"<subquestion2>\", ...]. \
+# MAKE SURE there are no vague concepts in each subquestion that require reference to other subquestions, such as determiners, pronominal and so on. \
+# If the question cannot be broken down, you need to rephrase it in 3 ways and output a list of string: [\"<rephrase1>\", \"<rephrase2>\", \"<rephrase3>\"]. \
+# ONLY output the list of subquestions or rephrases. \
+# "
+# SUMMARY_PROMPT = "\
+# You are a Summary Refiner. \
+# Given a question and several answers to it, you need to organize and summarize the answers to form one coherent answer to the question. \
+# ONLY output the summarized answer. \
+# "
+# REFERENCE_PROMPT = "\
+# You are a Reference Refiner. \
+# Given paragraphs extract from a paper, you need to remove the unnecessary and messy symbols to make it more readable. \
+# But keep the original expression and sentences as much as possible. \
+# ONLY output the refined paragraphs. \
+# "
+class RetrievalChatbot(BaseChatbot):
+    def __init__(self,
+                 model: BaseModel = None,
+                 memory: BaseMemory = None,
+                 retriever: BaseRetriever = None,
+                 decomposer: BaseRefiner = None,
+                 answerer: BaseRefiner = None,
+                 summarizer: BaseRefiner = None,
+        ) -> None:
+        self.model = model if model \
+                           else GPT4Model()
+        self.memory = memory if memory \
+                             else ChatMemory(sys_prompt=SummaryPrompt.content)
+        self.retriever = retriever if retriever \
+                                   else ChromaRetriever(pdf_dir="papers_all",
+                                                        collection_name="pdfs",
+                                                        split_args={"size": 2048, "overlap": 10},
+                                                        embed_model=GPT4Model())
+        self.decomposer = decomposer if decomposer \
+                                     else SimpleRefiner(model=GPT4Model(), sys_prompt=DecomposePrompt.content)
+        self.answerer = answerer if answerer \
+                                 else SimpleRefiner(model=GPT4Model(), sys_prompt=QAPrompt.content)
+        self.summarizer = summarizer if summarizer \
+                                     else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content)
+    def response(self, message: str, image_path=None) -> str:
+        print("Query: {message}".format(message=message))
+        question = self.decomposer.refine(message,image_path)
+        print(question)
+        sub_questions = ast.literal_eval(question)
+        print("Decomposed your query into subquestions: {sub_questions}".format(sub_questions=sub_questions))
+        references = ""
+        for sub_question in sub_questions:
+            print("="*20)
+            print(f"Subquestion: {sub_question}")
+            print(f"Retrieving pdf papers for references...\n")
+            sub_retrieve_reference = references
+            sub_retrieve = self.retriever.retrieve(sub_question)
+            for ref in sub_retrieve:
+                sub_retrieve_reference += "Related research: {ref}\n".format(ref=ref)
+            # context = self.memory.messages + [{"role": "user", "content": "References: {references}\nQuestion: {question}".format(references=reference, question=sub_question)}]
+            # sub_answer = self.model.respond(context)
+            sub_answerer_context = "Sub Question References: {sub_retrieve_reference}\nQuestion: {question}\n".format(sub_retrieve_reference=sub_retrieve_reference, question=sub_question)
+            sub_answer = self.answerer.refine(sub_answerer_context,image_path)
+            print(f"Subanswer: {sub_answer}")
+            references += "Subquestion: {sub_question}\nSubanswer: {sub_answer}\n".format(sub_question=sub_question, sub_answer=sub_answer)
+        refs = self.retriever.retrieve(message)
+        for ref in refs:
+            references += "Related research for the user query: {ref}\n".format(ref=ref)
+        summarizer_context = "Question References: {references}\nQuestion: {message}\n".format(references=references, message=message)
+        answer = self.summarizer.refine(summarizer_context,image_path)
+        #todo 记忆管理
+        self.memory.append([{"role": "user", "content": [
+                        {"type": "text", "text": f"{message}"},
+                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}},
+                    ]}, {"role": "assistant", "content": answer}])
+        print("="*20)
+        print(f"Final answer: {answer}".format(answer=answer))
+        return answer

core/memory/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .base_memory import BaseMemory
2	+ from .chat_memory import ChatMemory

core/memory/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (278 Bytes). View file

core/memory/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (249 Bytes). View file

core/memory/__pycache__/base_memory.cpython-311.pyc ADDED Viewed

Binary file (1.31 kB). View file

core/memory/__pycache__/base_memory.cpython-39.pyc ADDED Viewed

Binary file (1.12 kB). View file

core/memory/__pycache__/chat_memory.cpython-311.pyc ADDED Viewed

Binary file (1.66 kB). View file

core/memory/__pycache__/chat_memory.cpython-39.pyc ADDED Viewed

Binary file (1.26 kB). View file

core/memory/base_memory.py ADDED Viewed

	@@ -0,0 +1,18 @@

+class BaseMemory:
+    def __init__(self) -> None:
+        raise NotImplementedError
+    def append(self, message: str) -> None:
+        raise NotImplementedError
+    def pop(self) -> None:
+        raise NotImplementedError
+    def clear(self) -> None:
+        raise NotImplementedError
+    def load(self) -> None:
+        raise NotImplementedError
+    def save(self) -> None:
+        raise NotImplementedError

core/memory/chat_memory.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from .base_memory import BaseMemory
+class ChatMemory(BaseMemory):
+    def __init__(self, sys_prompt = None) -> None:
+        self.sys_prompt = sys_prompt
+        self.messages = [{"role": "system", "content": sys_prompt}] if sys_prompt else []
+    def append(self, message: list) -> None:
+        # assert
+        self.messages += message
+    def pop(self) -> None:
+        self.messages.pop()
+    def clear(self) -> None:
+        self.messages = [{"role": "system", "content": self.sys_prompt}]
+    def load(self) -> None:
+        pass
+    def save(self) -> None:
+        pass

core/memory/plan_memory.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from .base_memory import BaseMemory
+from dataclasses import dataclass
+@dataclass
+class Task:
+    def __init__(self, name: str, description: str):
+        self.name = name
+        self.description = description
+class TaskChain:
+    def __init__(self, tasks: list):
+        self.tasks = tasks
+    def append(self, task: Task):
+        self.tasks.append(task)
+    def clear(self):
+        self.tasks = []
+    def __str__(self):
+        return "\n".join([f"{task.name}: {task.description}" for task in self.tasks])
+class PlanMemory(BaseMemory):
+    def __init__(self, initial_message, initial_task) -> None:
+        self.messages = initial_message if initial_message else []
+        self.tasks = TaskChain(initial_task) if initial_task else TaskChain([])
+    def append(self, message: str) -> None:
+        self.messages.append(message)
+        #todo: parse message for tasks & add to task chain
+        self.tasks.append(Task("Task", message))
+    def clear(self) -> None:
+        self.messages = []
+    def load(self) -> None:
+        pass
+    def save(self) -> None:
+        pass

core/planner/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .base_planner import BasePlanner

core/planner/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (219 Bytes). View file

core/planner/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (206 Bytes). View file

core/planner/__pycache__/base_planner.cpython-311.pyc ADDED Viewed

Binary file (767 Bytes). View file

core/planner/__pycache__/base_planner.cpython-39.pyc ADDED Viewed

Binary file (625 Bytes). View file

core/planner/base_planner.py ADDED Viewed

	@@ -0,0 +1,6 @@

+class BasePlanner:
+    def __init__(self):
+        raise NotImplementedError
+    def plan(self, message: str) -> list[str]:
+        raise NotImplementedError

core/refiner/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .base_refiner import BaseRefiner
2	+ from .simple_refiner import SimpleRefiner

core/refiner/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (287 Bytes). View file

core/refiner/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (258 Bytes). View file

core/refiner/__pycache__/base_refiner.cpython-311.pyc ADDED Viewed

Binary file (938 Bytes). View file

core/refiner/__pycache__/base_refiner.cpython-39.pyc ADDED Viewed

Binary file (738 Bytes). View file

core/refiner/__pycache__/simple_refiner.cpython-311.pyc ADDED Viewed

Binary file (1.64 kB). View file

core/refiner/__pycache__/simple_refiner.cpython-39.pyc ADDED Viewed

Binary file (895 Bytes). View file

core/refiner/base_refiner.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from models import BaseModel
+class BaseRefiner:
+    def __init__(self,
+                 sys_prompt: str,
+                 model: BaseModel,
+        ) -> None:
+        self.sys_prompt = sys_prompt
+        self.model = model
+    def refine(self, message: str) -> str:
+        raise NotImplementedError

core/refiner/recursive_refiner.py ADDED Viewed

File without changes

core/refiner/simple_refiner.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from models import BaseModel
+from .base_refiner import BaseRefiner
+from utils.image_encoder import encode_image
+class SimpleRefiner(BaseRefiner):
+    def __init__(self,
+                 sys_prompt: str,
+                 model: BaseModel,
+        ) -> None:
+        BaseRefiner.__init__(self, sys_prompt=sys_prompt, model=model)
+    def refine(self, message: str, image_path=None) -> str:
+        if image_path:
+            context = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": [
+                            {"type": "text", "text": f"{message}"},
+                            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path)}"}},
+                        ]}]
+        else:
+            context = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": [
+                            {"type": "text", "text": f"{message}"},
+                        ]}]
+        response = self.model.respond(context)
+        return response

core/retriever/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .base_retriever import BaseRetriever
+from .chroma_retriever import ChromaRetriever
+from .faiss_retriever import FaissRetriever