diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..e738080ea3337950d8af57b784e0f02b023f25dc --- /dev/null +++ b/app.py @@ -0,0 +1,58 @@ +import core +import openai +import models +import time +import gradio as gr +import os +import asyncio +import time + +api_key = os.environ["OPENAI_API_KEY"] +api_base = os.environ["OPENAI_API_BASE"] + +# dddd +# def embed(texts: list): +# return openai.Embedding.create(input=texts, model="text-embedding-ada-002")["data"]["embedding"] + +def chatbot_initialize(): + retriever = core.retriever.ChromaRetriever(pdf_dir="", + collection_name="pdfs_1000", + split_args={"size": 2048, "overlap": 10}, #embedding_model="text-embedding-ada-002" + embed_model=models.BiomedModel() + ) + Chatbot = core.chatbot.RetrievalChatbot(retriever=retriever) + return Chatbot + +async def respond(query, chat_history, img_path_list, chat_history_string): + time1 = time.time() + global Chatbot + result = await Chatbot.response(query, image_paths=img_path_list) + response = result["answer"] + logs = result["logs"] + titles_set = result["titles"] + titles = "\n".join(list(titles_set)) + chat_history.append((query, response)) + if img_path_list is None: + chat_history_string += "Query: " + query + "\nImage: None" + "\nResponse: " + response + "\n\n\n" + else: + chat_history_string += "Query: " + query + "\nImages: " + "\n".join([path.name for path in img_path_list]) + "\nResponse: " + response + "\n\n\n" + time2 = time.time() + print(f"Total: {time2-time1}") + return "", chat_history, chat_history_string + +if __name__ == "__main__": + global Chatbot + Chatbot=chatbot_initialize() + + with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(scale=2): + chatbot = gr.Chatbot() + msg = gr.Textbox(label="Query", show_label=True) + imgs = gr.File(file_count='multiple', file_types=['image'], type="filepath", label='Upload Images') + clear = gr.ClearButton([msg, chatbot]) + with gr.Column(scale=1): + # titles = gr.Textbox(label="Referenced Article Titles", show_label=True, show_copy_button=True, interactive=False) + history = gr.Textbox(label="Copy Chat History", show_label=True, show_copy_button=True, interactive=False, max_lines=5) + msg.submit(respond, inputs=[msg, chatbot, imgs, history], outputs=[msg, chatbot, history]) + demo.queue().launch() \ No newline at end of file diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..347dfc3c72b31691e231cdbaf5b46ecc11326205 --- /dev/null +++ b/core/__init__.py @@ -0,0 +1,9 @@ +from .chain import * +from .chatbot import * +from .memory import * +from .planner import * +from .refiner import * +from .retriever import * + +from models import * +from prompts import * \ No newline at end of file diff --git a/core/__pycache__/__init__.cpython-311.pyc b/core/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3dd7b386ceb2e0dc180c437857fd2cbc0ba87b64 Binary files /dev/null and b/core/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/__pycache__/__init__.cpython-38.pyc b/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..da17b5058feac423343d3309826e3fda3f93bce4 Binary files /dev/null and b/core/__pycache__/__init__.cpython-38.pyc differ diff --git a/core/__pycache__/__init__.cpython-39.pyc b/core/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b6431111673759b3b27bb1779b648ccd70d7bc3 Binary files /dev/null and b/core/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/chain/__init__.py b/core/chain/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bc6da7ab7bd5f49b924b302bde1c53ed83e8d949 --- /dev/null +++ b/core/chain/__init__.py @@ -0,0 +1 @@ +from .base_chain import BaseChain \ No newline at end of file diff --git a/core/chain/__pycache__/__init__.cpython-311.pyc b/core/chain/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..750baf36f5d16f527f906a81ad56a1c459e5fd78 Binary files /dev/null and b/core/chain/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/chain/__pycache__/__init__.cpython-38.pyc b/core/chain/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0460aeaaabd1a99eaa83c20491a55ab75c339730 Binary files /dev/null and b/core/chain/__pycache__/__init__.cpython-38.pyc differ diff --git a/core/chain/__pycache__/__init__.cpython-39.pyc b/core/chain/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08350a44cf86cc21971ad333feee95084d294aa2 Binary files /dev/null and b/core/chain/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/chain/__pycache__/base_chain.cpython-311.pyc b/core/chain/__pycache__/base_chain.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa1fdc52935d02667f46671373b11d16fb047079 Binary files /dev/null and b/core/chain/__pycache__/base_chain.cpython-311.pyc differ diff --git a/core/chain/__pycache__/base_chain.cpython-38.pyc b/core/chain/__pycache__/base_chain.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..77b4895af6a650b0527022a643cd833090bfa758 Binary files /dev/null and b/core/chain/__pycache__/base_chain.cpython-38.pyc differ diff --git a/core/chain/__pycache__/base_chain.cpython-39.pyc b/core/chain/__pycache__/base_chain.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eec8ee19a4f54be86341497be2f6233b4dfd9e1f Binary files /dev/null and b/core/chain/__pycache__/base_chain.cpython-39.pyc differ diff --git a/core/chain/base_chain.py b/core/chain/base_chain.py new file mode 100644 index 0000000000000000000000000000000000000000..7418a764fd66dbd73679617c9e4692a77d79d004 --- /dev/null +++ b/core/chain/base_chain.py @@ -0,0 +1,10 @@ + +class BaseChain: + def __init__(self, chain: list): + raise NotImplementedError + + def append(self, item: str): + raise NotImplementedError + + def execute(self): + raise NotImplementedError \ No newline at end of file diff --git a/core/chain/simple_chain.py b/core/chain/simple_chain.py new file mode 100644 index 0000000000000000000000000000000000000000..99fa70e7d2570aea122bea2c780f2554d9ca1f89 --- /dev/null +++ b/core/chain/simple_chain.py @@ -0,0 +1,19 @@ +from base_chain import BaseChain + +# class SimpleChain(BaseChain): +# def __init__(self, chain: list[str]): +# self.chain = chain if chain else [] + +# def append(self, item: str): +# self.chain.append(item) + +# def execute(self): +# # raise NotImplementedError +# for item in self.chain: +# pass +# #todo: execute item +# # item --> result +# item.execute(param=param) +# # result --> next item + +# return result \ No newline at end of file diff --git a/core/chatbot/__init__.py b/core/chatbot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..21040f53bc9ba7e350c87d6e7da188d07bc1acd5 --- /dev/null +++ b/core/chatbot/__init__.py @@ -0,0 +1,2 @@ +from .base_chatbot import BaseChatbot +from .retrieval_chatbot import RetrievalChatbot \ No newline at end of file diff --git a/core/chatbot/__pycache__/__init__.cpython-311.pyc b/core/chatbot/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0468d6b36032744d70f35e0115abcd8c4e2bb740 Binary files /dev/null and b/core/chatbot/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/chatbot/__pycache__/__init__.cpython-39.pyc b/core/chatbot/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..827568778a7a388763142643b5c08d80bab874b0 Binary files /dev/null and b/core/chatbot/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/chatbot/__pycache__/base_chatbot.cpython-311.pyc b/core/chatbot/__pycache__/base_chatbot.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41c54f20a4161c163f18a50d6d679d68350ac967 Binary files /dev/null and b/core/chatbot/__pycache__/base_chatbot.cpython-311.pyc differ diff --git a/core/chatbot/__pycache__/base_chatbot.cpython-39.pyc b/core/chatbot/__pycache__/base_chatbot.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa41bb3af0312b4d955b890718f976f930b58b72 Binary files /dev/null and b/core/chatbot/__pycache__/base_chatbot.cpython-39.pyc differ diff --git a/core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc b/core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b096701464152d58575add4d47b71731d256b584 Binary files /dev/null and b/core/chatbot/__pycache__/retrieval_chatbot.cpython-311.pyc differ diff --git a/core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc b/core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6e16175e9d8aca403f6235aa0beaccc704a38aa Binary files /dev/null and b/core/chatbot/__pycache__/retrieval_chatbot.cpython-39.pyc differ diff --git a/core/chatbot/base_chatbot.py b/core/chatbot/base_chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..17bf2a237b53009668ad1b1dc28914ae67a4aca9 --- /dev/null +++ b/core/chatbot/base_chatbot.py @@ -0,0 +1,12 @@ +from models import BaseModel +from ..memory import BaseMemory +class BaseChatbot: + def __init__(self, + model: BaseModel, + memory: BaseMemory + ) -> None: + self.model = model + self.memory = memory + + def respond(self, message: str) -> str: + raise NotImplementedError \ No newline at end of file diff --git a/core/chatbot/retrieval_chatbot.py b/core/chatbot/retrieval_chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..55612a170eb2e92057a78202334648c874e57215 --- /dev/null +++ b/core/chatbot/retrieval_chatbot.py @@ -0,0 +1,69 @@ +from .base_chatbot import BaseChatbot +from ..memory import BaseMemory, ChatMemory +from ..retriever import BaseRetriever, ChromaRetriever, FaissRetriever +from ..refiner import BaseRefiner, SimpleRefiner +from models import BaseModel, GPT4Model +from prompts import DecomposePrompt, QAPrompt, SummaryPrompt, ReferencePrompt +import ast +from utils.image_encoder import encode_image +import asyncio +import time + +class RetrievalChatbot(BaseChatbot): + def __init__(self, + model: BaseModel = None, + memory: BaseMemory = None, + retriever: BaseRetriever = None, + decomposer: BaseRefiner = None, + answerer: BaseRefiner = None, + summarizer: BaseRefiner = None, + ) -> None: + self.model = model if model \ + else GPT4Model() + self.memory = memory if memory \ + else ChatMemory(sys_prompt=SummaryPrompt.content) + self.retriever = retriever if retriever \ + else ChromaRetriever(pdf_dir="papers_all", + collection_name="pdfs", + split_args={"size": 2048, "overlap": 10}, + embed_model=GPT4Model()) + self.decomposer = decomposer if decomposer \ + else SimpleRefiner(model=GPT4Model(), sys_prompt=DecomposePrompt.content) + self.answerer = answerer if answerer \ + else SimpleRefiner(model=GPT4Model(), sys_prompt=QAPrompt.content) + self.summarizer = summarizer if summarizer \ + else SimpleRefiner(model=GPT4Model(), sys_prompt=SummaryPrompt.content) + + async def response(self, message: str, image_paths=None, return_logs=False) -> str: + time1 = time.time() + print("Query: {message}".format(message=message)) + retrieved_reference="" + time_s = time.time() + results = self.retriever.retrieve(message) + refs, titles = results + for ref in refs: + retrieved_reference += "Related research: {ref}\n".format(ref=ref) + answerer_context = "Sub Question References: {retrieved_reference}\nQuestion: {message}\n".format(retrieved_reference=retrieved_reference, message=message) + answer = self.answerer.refine(answerer_context, self.memory, image_paths) + time_e = time.time() + + #todo 记忆管理 + if image_paths is None: + self.memory.append([{"role": "user", "content": [ + {"type": "text", "text": f"{message}"}, + ]}, {"role": "assistant", "content": answer}]) + else: + if not isinstance(image_paths, list): + image_paths = [image_paths] + memory_user = [{"type": "text", "text": f"{message}"},] + for image_path in image_paths: + memory_user.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path.name)}"}},) + self.memory.append([{"role": "user", "content": memory_user}, {"role": "assistant", "content": answer}]) + print("="*20) + print(f"Final answer: {answer}".format(answer=answer)) + + return { + "answer": answer, + "titles": set(titles), + "logs": "" + } \ No newline at end of file diff --git a/core/memory/__init__.py b/core/memory/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..74844cb1535dedba85b3a316c65f52e754966808 --- /dev/null +++ b/core/memory/__init__.py @@ -0,0 +1,2 @@ +from .base_memory import BaseMemory +from .chat_memory import ChatMemory diff --git a/core/memory/__pycache__/__init__.cpython-311.pyc b/core/memory/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9886513457362608a02c72b812d30dfe6e919dd9 Binary files /dev/null and b/core/memory/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/memory/__pycache__/__init__.cpython-39.pyc b/core/memory/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8bcd2e0a6b82db4d182a5f02a930e15970a118c Binary files /dev/null and b/core/memory/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/memory/__pycache__/base_memory.cpython-311.pyc b/core/memory/__pycache__/base_memory.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2881a9b78404f78314e20fe572762a30091bb1b3 Binary files /dev/null and b/core/memory/__pycache__/base_memory.cpython-311.pyc differ diff --git a/core/memory/__pycache__/base_memory.cpython-39.pyc b/core/memory/__pycache__/base_memory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6cd01196b6eb41a1aea849c708b9a349941c83d Binary files /dev/null and b/core/memory/__pycache__/base_memory.cpython-39.pyc differ diff --git a/core/memory/__pycache__/chat_memory.cpython-311.pyc b/core/memory/__pycache__/chat_memory.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b64595e3fd6fc8d5a1950b3b2f6b747dec60eecc Binary files /dev/null and b/core/memory/__pycache__/chat_memory.cpython-311.pyc differ diff --git a/core/memory/__pycache__/chat_memory.cpython-39.pyc b/core/memory/__pycache__/chat_memory.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..455f324ce725b6fe537eeed92da6ad914715df51 Binary files /dev/null and b/core/memory/__pycache__/chat_memory.cpython-39.pyc differ diff --git a/core/memory/base_memory.py b/core/memory/base_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..0f023bc2320b6ada4077f10b3d19af32be336a4c --- /dev/null +++ b/core/memory/base_memory.py @@ -0,0 +1,18 @@ +class BaseMemory: + def __init__(self) -> None: + raise NotImplementedError + + def append(self, message: str) -> None: + raise NotImplementedError + + def pop(self) -> None: + raise NotImplementedError + + def clear(self) -> None: + raise NotImplementedError + + def load(self) -> None: + raise NotImplementedError + + def save(self) -> None: + raise NotImplementedError \ No newline at end of file diff --git a/core/memory/chat_memory.py b/core/memory/chat_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..38b8c3888193dcd840b8363ad966e672c9daff18 --- /dev/null +++ b/core/memory/chat_memory.py @@ -0,0 +1,22 @@ +from .base_memory import BaseMemory + +class ChatMemory(BaseMemory): + def __init__(self, sys_prompt = None) -> None: + self.sys_prompt = sys_prompt + self.messages = [{"role": "system", "content": sys_prompt}] if sys_prompt else [] + + def append(self, message: list) -> None: + # assert + self.messages += message + + def pop(self) -> None: + self.messages.pop() + + def clear(self) -> None: + self.messages = [{"role": "system", "content": self.sys_prompt}] + + def load(self) -> None: + pass + + def save(self) -> None: + pass \ No newline at end of file diff --git a/core/memory/plan_memory.py b/core/memory/plan_memory.py new file mode 100644 index 0000000000000000000000000000000000000000..2aabe67acae05c5bc15daeabba649a5523a9e450 --- /dev/null +++ b/core/memory/plan_memory.py @@ -0,0 +1,38 @@ +from .base_memory import BaseMemory + +from dataclasses import dataclass + +@dataclass +class Task: + def __init__(self, name: str, description: str): + self.name = name + self.description = description + +class TaskChain: + def __init__(self, tasks: list): + self.tasks = tasks + def append(self, task: Task): + self.tasks.append(task) + def clear(self): + self.tasks = [] + def __str__(self): + return "\n".join([f"{task.name}: {task.description}" for task in self.tasks]) + +class PlanMemory(BaseMemory): + def __init__(self, initial_message, initial_task) -> None: + self.messages = initial_message if initial_message else [] + self.tasks = TaskChain(initial_task) if initial_task else TaskChain([]) + + def append(self, message: str) -> None: + self.messages.append(message) + #todo: parse message for tasks & add to task chain + self.tasks.append(Task("Task", message)) + + def clear(self) -> None: + self.messages = [] + + def load(self) -> None: + pass + + def save(self) -> None: + pass \ No newline at end of file diff --git a/core/planner/__init__.py b/core/planner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5805b53e3ed8b2881a2b1b997a24451d5be894c8 --- /dev/null +++ b/core/planner/__init__.py @@ -0,0 +1 @@ +from .base_planner import BasePlanner \ No newline at end of file diff --git a/core/planner/__pycache__/__init__.cpython-311.pyc b/core/planner/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..138438f45a0bc5b2df25729a8295958abc89f080 Binary files /dev/null and b/core/planner/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/planner/__pycache__/__init__.cpython-39.pyc b/core/planner/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2396dceaef457d5b048392980de3cc69a154ecab Binary files /dev/null and b/core/planner/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/planner/__pycache__/base_planner.cpython-311.pyc b/core/planner/__pycache__/base_planner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c7a42440f54bbb31f7624aa8fbfdded159e8304 Binary files /dev/null and b/core/planner/__pycache__/base_planner.cpython-311.pyc differ diff --git a/core/planner/__pycache__/base_planner.cpython-39.pyc b/core/planner/__pycache__/base_planner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..17717a7905521be26e6d779a817d03ce4a3d3f88 Binary files /dev/null and b/core/planner/__pycache__/base_planner.cpython-39.pyc differ diff --git a/core/planner/base_planner.py b/core/planner/base_planner.py new file mode 100644 index 0000000000000000000000000000000000000000..36a9112a52b3704e143fe9004d3388766750e7dd --- /dev/null +++ b/core/planner/base_planner.py @@ -0,0 +1,6 @@ + +class BasePlanner: + def __init__(self): + raise NotImplementedError + def plan(self, message: str) -> list[str]: + raise NotImplementedError \ No newline at end of file diff --git a/core/refiner/__init__.py b/core/refiner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f463f4b34411a6cbb8b763769a048bc6ccb47496 --- /dev/null +++ b/core/refiner/__init__.py @@ -0,0 +1,2 @@ +from .base_refiner import BaseRefiner +from .simple_refiner import SimpleRefiner \ No newline at end of file diff --git a/core/refiner/__pycache__/__init__.cpython-311.pyc b/core/refiner/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3fbdfdd13e6c4f3a14daa169d7cb5f4f7a534766 Binary files /dev/null and b/core/refiner/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/refiner/__pycache__/__init__.cpython-39.pyc b/core/refiner/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1f9990d4317002ba7e3f3c34b2b3a665337299a Binary files /dev/null and b/core/refiner/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/refiner/__pycache__/base_refiner.cpython-311.pyc b/core/refiner/__pycache__/base_refiner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..50ca1967046ac138f6b500950ace89cf7311cf0b Binary files /dev/null and b/core/refiner/__pycache__/base_refiner.cpython-311.pyc differ diff --git a/core/refiner/__pycache__/base_refiner.cpython-39.pyc b/core/refiner/__pycache__/base_refiner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c6e1c3d8b62f957520cf493360959dd5da3fa4e Binary files /dev/null and b/core/refiner/__pycache__/base_refiner.cpython-39.pyc differ diff --git a/core/refiner/__pycache__/simple_refiner.cpython-311.pyc b/core/refiner/__pycache__/simple_refiner.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffb35077578ad1cb86a16946f9b6c75b10a8917b Binary files /dev/null and b/core/refiner/__pycache__/simple_refiner.cpython-311.pyc differ diff --git a/core/refiner/__pycache__/simple_refiner.cpython-39.pyc b/core/refiner/__pycache__/simple_refiner.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5daa58d9a640b7d1d7b7c31d46314a788747ff1 Binary files /dev/null and b/core/refiner/__pycache__/simple_refiner.cpython-39.pyc differ diff --git a/core/refiner/base_refiner.py b/core/refiner/base_refiner.py new file mode 100644 index 0000000000000000000000000000000000000000..2ef11db0ef1ee62f9de4232cebbf88bfc40bff0f --- /dev/null +++ b/core/refiner/base_refiner.py @@ -0,0 +1,11 @@ +from models import BaseModel +class BaseRefiner: + def __init__(self, + sys_prompt: str, + model: BaseModel, + ) -> None: + self.sys_prompt = sys_prompt + self.model = model + + def refine(self, message: str) -> str: + raise NotImplementedError \ No newline at end of file diff --git a/core/refiner/recursive_refiner.py b/core/refiner/recursive_refiner.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/core/refiner/simple_refiner.py b/core/refiner/simple_refiner.py new file mode 100644 index 0000000000000000000000000000000000000000..931e82adc1f6d68c0fbefc0897e9442aa20271cb --- /dev/null +++ b/core/refiner/simple_refiner.py @@ -0,0 +1,50 @@ +from models import BaseModel +from .base_refiner import BaseRefiner +from utils.image_encoder import encode_image +import asyncio + +class SimpleRefiner(BaseRefiner): + def __init__(self, + sys_prompt: str, + model: BaseModel, + ) -> None: + BaseRefiner.__init__(self, sys_prompt=sys_prompt, model=model) + + async def refine_async(self, message: str, memory, image_paths=None) -> str: + if memory is None: + memory = [] + else: + memory = memory.messages[1:] + + user_context = [{"role": "user", "content": [ + {"type": "text", "text": f"{message}"},]}] + if image_paths: + if not isinstance(image_paths, list): + image_paths = [image_paths] + for image_path in image_paths: + user_context[0]["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path.name)}"}}) + context = [{"role": "system", "content": self.sys_prompt}] + memory + user_context + + respond_task = asyncio.create_task(self.model.respond_async(context)) + await respond_task + response = respond_task.result() + return response + + def refine(self, message: str, memory, image_paths=None) -> str: + if memory is None: + memory = [] + else: + memory = memory.messages[1:] + + user_context = [{"role": "user", "content": [ + {"type": "text", "text": f"{message}"},]}] + if image_paths: + if not isinstance(image_paths, list): + image_paths = [image_paths] + for image_path in image_paths: + user_context[0]["content"].append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encode_image(image_path.name)}"}}) + context = [{"role": "system", "content": self.sys_prompt}] + memory + user_context + + response = self.model.respond(context) + + return response \ No newline at end of file diff --git a/core/retriever/__init__.py b/core/retriever/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..67622890a05fedb481f44f9bdb22336b05a97647 --- /dev/null +++ b/core/retriever/__init__.py @@ -0,0 +1,3 @@ +from .base_retriever import BaseRetriever +from .chroma_retriever import ChromaRetriever +from .faiss_retriever import FaissRetriever \ No newline at end of file diff --git a/core/retriever/__pycache__/__init__.cpython-311.pyc b/core/retriever/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30ef38914952e5d6a2febf82573d1c680a179781 Binary files /dev/null and b/core/retriever/__pycache__/__init__.cpython-311.pyc differ diff --git a/core/retriever/__pycache__/__init__.cpython-39.pyc b/core/retriever/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..75bef3f19b89d3a56ccada92008042054cfd317c Binary files /dev/null and b/core/retriever/__pycache__/__init__.cpython-39.pyc differ diff --git a/core/retriever/__pycache__/base_retriever.cpython-311.pyc b/core/retriever/__pycache__/base_retriever.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a1cf8d3ede0b565eee41f9b5d958f10db850aaa1 Binary files /dev/null and b/core/retriever/__pycache__/base_retriever.cpython-311.pyc differ diff --git a/core/retriever/__pycache__/base_retriever.cpython-39.pyc b/core/retriever/__pycache__/base_retriever.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..553f0923845bfb47a1c08e0568c19808f3a483ae Binary files /dev/null and b/core/retriever/__pycache__/base_retriever.cpython-39.pyc differ diff --git a/core/retriever/__pycache__/chroma_retriever.cpython-311.pyc b/core/retriever/__pycache__/chroma_retriever.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5bf2ea6f205394b4603816826d23a54f25234dd Binary files /dev/null and b/core/retriever/__pycache__/chroma_retriever.cpython-311.pyc differ diff --git a/core/retriever/__pycache__/chroma_retriever.cpython-39.pyc b/core/retriever/__pycache__/chroma_retriever.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1357d9a2ee7eaeea48738ff58d1c34a8aaabb3b7 Binary files /dev/null and b/core/retriever/__pycache__/chroma_retriever.cpython-39.pyc differ diff --git a/core/retriever/__pycache__/faiss_retriever.cpython-311.pyc b/core/retriever/__pycache__/faiss_retriever.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22a09dfeef93a3e6cbea48b0a6ad5154d90025a4 Binary files /dev/null and b/core/retriever/__pycache__/faiss_retriever.cpython-311.pyc differ diff --git a/core/retriever/__pycache__/faiss_retriever.cpython-39.pyc b/core/retriever/__pycache__/faiss_retriever.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e57c768dd7db2ab572aaa971153a03ee255c3470 Binary files /dev/null and b/core/retriever/__pycache__/faiss_retriever.cpython-39.pyc differ diff --git a/core/retriever/base_retriever.py b/core/retriever/base_retriever.py new file mode 100644 index 0000000000000000000000000000000000000000..b0eeaa1521ce8149ac6fa7e100a8d941cdbd4580 --- /dev/null +++ b/core/retriever/base_retriever.py @@ -0,0 +1,7 @@ + +class BaseRetriever: + def __init__(self): + raise NotImplementedError + + def retrieve(self, query: str) -> list: + raise NotImplementedError \ No newline at end of file diff --git a/core/retriever/chroma_retriever.py b/core/retriever/chroma_retriever.py new file mode 100644 index 0000000000000000000000000000000000000000..b20589ea7c74adb6aa7848406f287d7a3df7b1f8 --- /dev/null +++ b/core/retriever/chroma_retriever.py @@ -0,0 +1,89 @@ +from .base_retriever import BaseRetriever +from models import BaseModel + +from langchain.document_loaders import PyMuPDFLoader, DirectoryLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from chromadb import PersistentClient +import os + + +class ChromaRetriever(BaseRetriever): + def __init__(self, + pdf_dir: str, + collection_name: str, + split_args: dict, + embed_model: BaseModel = None, + refine_model: BaseModel = None,): + ''' + pdf_dir: directory containing pdfs for vector database + collection_name: name of collection to be used (if collection exists, it will be loaded, otherwise it will be created) + split_args: dictionary of arguments for text splitter ("size": size of chunks, "overlap": overlap between chunks) + embed_model: model to embed text chunks (if not provided, will use chroma's default embeddings) + example: + from models import GPT4Model + dir = "papers" + retriever = ChromaRetriever(dir, "pdfs", {"size": 2048, "overlap": 10}, embed_model=GPT4Model() + ''' + self.embed_model = embed_model + + + if not os.path.exists("persist"): + os.mkdir("persist") + client = PersistentClient(path="persist") + print(client.list_collections()) + + try: + collection = client.get_collection(name=collection_name) + except: + print("Creating new collection...") + print("Loading pdf papers into the vector database... ") + pdf_loader = DirectoryLoader(pdf_dir, loader_cls=PyMuPDFLoader) + docs = pdf_loader.load() + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=split_args["size"], chunk_overlap=split_args["overlap"]) + split_docs = text_splitter.split_documents(docs) + texts = [doc.page_content for doc in split_docs] + + # TODO + titles = [doc.metadata["title"] for doc in split_docs] + + collection = client.create_collection(name=collection_name) + if embed_model is not None: + embeddings = embed_model.embedding(texts) + collection.add( + embeddings=embeddings, + documents=texts, + ids=[str(i+1) for i in range(len(texts))], + metadatas=[{"title": title} for title in titles] + ) + else: + collection.add( + documents=texts, + ids=[str(i+1) for i in range(len(texts))], + metadatas=[{"title": title} for title in titles] + ) + + self.collection = collection + print("Papers Loaded.") + + + def retrieve(self, query: str, k: int = 5) -> list: + ''' + query: text string used to query the vector database + k: number of text chunks to return + returns: list of retrieved text chunks + example: + retriever.retrieve("how do sex chromosomes in rhesus monkeys influence proteome?", k=10) + ''' + + if self.embed_model is not None: + results = self.collection.query( + query_embeddings=self.embed_model.embedding([query]), + n_results=k, + ) + else: + results = self.collection.query( + query_texts=[query], + n_results=k, + ) + return results['documents'][0], [result["title"] for result in results['metadatas'][0]] diff --git a/core/retriever/faiss_retriever.py b/core/retriever/faiss_retriever.py new file mode 100644 index 0000000000000000000000000000000000000000..33616f54a7da31da1439438ba955da15227d62e6 --- /dev/null +++ b/core/retriever/faiss_retriever.py @@ -0,0 +1,8 @@ +from .base_retriever import BaseRetriever + +class FaissRetriever(BaseRetriever): + def __init__(self): + self.vector_base = None + + def retrieve(self, query: str) -> list: + pass \ No newline at end of file diff --git a/core/retriever/web_retirever.py b/core/retriever/web_retirever.py new file mode 100644 index 0000000000000000000000000000000000000000..14cc5ec9fb8b4653210d98726f1b05f4d89db9b5 --- /dev/null +++ b/core/retriever/web_retirever.py @@ -0,0 +1,8 @@ +from .base_retriever import BaseRetriever + +class WebRetriever(BaseRetriever): + def __init__(self): + pass + + def retrieve(self, query: str) -> list: + pass \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8614beaade72f19c047db7a53b0376c814d7298b --- /dev/null +++ b/models/__init__.py @@ -0,0 +1,3 @@ +from .base_model import BaseModel +from .gpt4_model import GPT4Model +from .biomed_model import BiomedModel \ No newline at end of file diff --git a/models/__pycache__/__init__.cpython-311.pyc b/models/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae884506b86b838a7f4ada1559a26ac846e66abb Binary files /dev/null and b/models/__pycache__/__init__.cpython-311.pyc differ diff --git a/models/__pycache__/__init__.cpython-39.pyc b/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..651c6b04b5fe947222c6c12f085fa589557f921e Binary files /dev/null and b/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/__pycache__/base_model.cpython-311.pyc b/models/__pycache__/base_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0930ed99de9bed2443c8477034f9d191c9cd61d8 Binary files /dev/null and b/models/__pycache__/base_model.cpython-311.pyc differ diff --git a/models/__pycache__/base_model.cpython-39.pyc b/models/__pycache__/base_model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a4f7d2149678ff2294a8c9dade10eb4a69593cd3 Binary files /dev/null and b/models/__pycache__/base_model.cpython-39.pyc differ diff --git a/models/__pycache__/biomed_model.cpython-311.pyc b/models/__pycache__/biomed_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83e2ea0c337b2e59ab3fbc323f9f386e3e405beb Binary files /dev/null and b/models/__pycache__/biomed_model.cpython-311.pyc differ diff --git a/models/__pycache__/gpt4_model.cpython-311.pyc b/models/__pycache__/gpt4_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5681dc1195de02ebc605a1938c7de4459a17b838 Binary files /dev/null and b/models/__pycache__/gpt4_model.cpython-311.pyc differ diff --git a/models/__pycache__/gpt4_model.cpython-39.pyc b/models/__pycache__/gpt4_model.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a8a2fbfecf12d4d877c854f4f97c496d6d510e56 Binary files /dev/null and b/models/__pycache__/gpt4_model.cpython-39.pyc differ diff --git a/models/base_model.py b/models/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..e2bfaea31714efcdb26f26c2bfd112eaa2948e5a --- /dev/null +++ b/models/base_model.py @@ -0,0 +1,10 @@ + +class BaseModel: + def __init__(self) -> None: + raise NotImplementedError + + def respond(self, messages: str) -> str: + raise NotImplementedError + + def embedding(self, message: str) -> str: + raise NotImplementedError \ No newline at end of file diff --git a/models/biomed_model.py b/models/biomed_model.py new file mode 100644 index 0000000000000000000000000000000000000000..ce39c7fbc53c5b12df99614a8a47d4b24b7d8ce0 --- /dev/null +++ b/models/biomed_model.py @@ -0,0 +1,31 @@ +from .base_model import BaseModel +import openai +from tqdm import tqdm +from sentence_transformers import SentenceTransformer +class BiomedModel(BaseModel): + def __init__(self, + generation_model="gpt-4", + embedding_model="pritamdeka/S-PubMedBert-MS-MARCO", + temperature=0, + ) -> None: + self.generation_model = generation_model + self.embedding_model = SentenceTransformer(embedding_model) + self.temperature = temperature + + def respond(self, messages: str) -> str: + response = openai.ChatCompletion.create( + messages=messages, + model=self.generation_model, + temperature=self.temperature, + ).choices[0]['message']['content'] + + return response + + def embedding(self, texts: list) -> list: + + if len(texts) == 1: + return self.embedding_model.encode(texts[0]).tolist() + else: + data = self.embedding_model.encode(texts, show_progress_bar=True) + data = [d.tolist() for d in data] + return data \ No newline at end of file diff --git a/models/gpt4_model.py b/models/gpt4_model.py new file mode 100644 index 0000000000000000000000000000000000000000..886e58d51778aeebc147b1339b85b3ddd8c4d23e --- /dev/null +++ b/models/gpt4_model.py @@ -0,0 +1,78 @@ +from .base_model import BaseModel + +import openai +from openai import AsyncOpenAI, OpenAI +from tqdm import tqdm +import asyncio +import os + +class GPT4Model(BaseModel): + def __init__(self, + generation_model="gpt-4-vision-preview", + embedding_model="text-embedding-ada-002", + temperature=0, + ) -> None: + self.generation_model = generation_model + self.embedding_model = embedding_model + self.temperature = temperature + + async def respond_async(self, messages: list[dict]) -> str: + client = AsyncOpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ["OPENAI_API_BASE"] + ) + print("start api call") + output = await client.chat.completions.create( + messages=messages, + model=self.generation_model, + temperature=self.temperature, + max_tokens=1000, + ) + print("end api call") + response = output.choices[0].message.content + # content = response.choices[0]['message']['content'] + + return response + + def respond(self, messages: list[dict]) -> str: + client = OpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ["OPENAI_API_BASE"] + ) + # OpenAI.api_key=os.environ["OPENAI_API_KEY"] + # OpenAI.api_base=os.environ["OPENAI_API_BASE"] + try: + response = client.chat.completions.create( + messages=messages, + model=self.generation_model, + temperature=self.temperature, + max_tokens=1000, + ).choices[0].message.content + except: + try: + response = client.chat.completions.create( + messages=messages, + model=self.generation_model, + temperature=self.temperature, + max_tokens=1000, + ).choices[0].message.content + except: + response = "No answer provided." + return response + + def embedding(self, texts: list[str]) -> list[float]: + client = OpenAI( + api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ["OPENAI_API_BASE"] + ) + data = [] + # print(f"{self.embedding_model} Embedding:") + for i in range(0, len(texts), 2048): + lower = i + upper = min(i+2048, len(texts)) + data += client.embeddings.create(input=texts[lower:upper], + model=self.embedding_model + ).data + embeddings = [d.embedding for d in data] + + return embeddings \ No newline at end of file diff --git a/prompts/__init__.py b/prompts/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97f6f7a65ee9bf31d725dc7ba8388b94c5fd8abc --- /dev/null +++ b/prompts/__init__.py @@ -0,0 +1 @@ +from .retrieval_prompt import * \ No newline at end of file diff --git a/prompts/__pycache__/__init__.cpython-311.pyc b/prompts/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cef88e782fc3d6b0e60a82c903259362f0583d2 Binary files /dev/null and b/prompts/__pycache__/__init__.cpython-311.pyc differ diff --git a/prompts/__pycache__/retrieval_prompt.cpython-311.pyc b/prompts/__pycache__/retrieval_prompt.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..672cda1596a1ba8a3fa8460c3b10f51dae7c6156 Binary files /dev/null and b/prompts/__pycache__/retrieval_prompt.cpython-311.pyc differ diff --git a/prompts/retrieval_prompt.py b/prompts/retrieval_prompt.py new file mode 100644 index 0000000000000000000000000000000000000000..05c07ce9f4029eac031119267e356df9af1e8160 --- /dev/null +++ b/prompts/retrieval_prompt.py @@ -0,0 +1,46 @@ +from dataclasses import dataclass + +@dataclass +class BasePrompt: + description: str + content: str + +@dataclass +class DecomposePrompt(BasePrompt): + description = "Question decomposition" + content = "You are a Question Decomposer. " + \ + "Given a question and an image input in any, your task is to breaking it down into multiple subquestions and provide a list of strings: ['', '', ...]. " + \ + "Output a maximum of five subquestions." + \ + "ENSURE each subquestion is a complete question that avoids vague concepts requiring reference to other subquestions, such as determiners and pronouns. " + \ + "ONLY output the list of subquestions. " + +@dataclass +class SummaryPrompt(BasePrompt): + description = "Answer Summarization" + content = "You are a Answer Summarizer. " + \ + "Given a primary question, your task is to generate summarized answer by distilling and orginazing information from several subquestion-subanswer pairs and related researches of the primary question. " + \ + "ENSURE the answer is concise and coherent. " + \ + "ONLY output the final summarized answer. " + +@dataclass +class QAPrompt(BasePrompt): + description = "Question Answering" + content = "You are a Question-Answerer. " + \ + "Given a question, your task is to answer it according to the references. " + \ + "If you find the references insufficient, you can answer the question according to your own knowledge. " + \ + "Provide a comprehensive and detailed answer. " + \ + "Please limit your answer to within 300 words if possible " + \ + "ONLY output the answer. " + +@dataclass +class ReferencePrompt(BasePrompt): + description = "Reference Refinement" + content = "You are a Reference Refiner. " + \ + "Given paragraphs extract from a paper, your task is to remove the unnecessary and messy symbols to make it more readable. " + \ + "But keep the original expression and sentences as much as possible. " + \ + "ONLY output the refined paragraphs. " + +@dataclass +class ReversePrompt(BasePrompt): + description = "Reverse Chain of Thought" + content = "" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dd4c00973393650d400a44a61553278a5852b14f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,123 @@ +aiofiles==23.2.1 +aiohttp==3.9.1 +aiosignal==1.3.1 +altair==5.2.0 +annotated-types==0.6.0 +anyio==3.7.1 +async-timeout==4.0.3 +asyncio==3.4.3 +attrs==23.1.0 +backoff==2.2.1 +bcrypt==4.1.2 +cachetools==5.3.2 +certifi==2023.11.17 +charset-normalizer==3.3.2 +chroma-hnswlib==0.7.3 +chromadb==0.4.15 +click==8.1.7 +coloredlogs==15.0.1 +contourpy==1.2.0 +cycler==0.12.1 +dataclasses-json==0.6.3 +Deprecated==1.2.14 +exceptiongroup==1.2.0 +fastapi==0.105.0 +ffmpy==0.3.1 +filelock==3.13.1 +flatbuffers==23.5.26 +fonttools==4.46.0 +frozenlist==1.4.1 +fsspec==2023.12.2 +google-auth==2.25.2 +googleapis-common-protos==1.62.0 +gradio==3.47.1 +gradio_client==0.6.0 +grpcio==1.60.0 +h11==0.14.0 +httpcore==1.0.2 +httptools==0.6.1 +httpx==0.25.2 +huggingface-hub==0.19.4 +humanfriendly==10.0 +idna==3.6 +importlib-metadata==6.11.0 +importlib-resources==6.1.1 +Jinja2==3.1.2 +jsonpatch==1.33 +jsonpointer==2.4 +jsonschema==4.20.0 +jsonschema-specifications==2023.11.2 +kiwisolver==1.4.5 +kubernetes==28.1.0 +langchain==0.0.333 +langsmith==0.0.71 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +matplotlib==3.8.2 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.4 +mypy-extensions==1.0.0 +numpy==1.26.2 +oauthlib==3.2.2 +onnxruntime==1.16.3 +openai==1.7.2 +opentelemetry-api==1.22.0 +opentelemetry-exporter-otlp-proto-common==1.22.0 +opentelemetry-exporter-otlp-proto-grpc==1.22.0 +opentelemetry-proto==1.22.0 +opentelemetry-sdk==1.22.0 +opentelemetry-semantic-conventions==0.43b0 +orjson==3.9.10 +overrides==7.4.0 +packaging==23.2 +pandas==2.1.4 +Pillow==10.1.0 +posthog==3.1.0 +protobuf==4.25.1 +pulsar-client==3.3.0 +pyasn1==0.5.1 +pyasn1-modules==0.3.0 +pydantic==2.5.2 +pydantic_core==2.14.5 +pydub==0.25.1 +PyMuPDF==1.23.6 +PyMuPDFb==1.23.6 +pyparsing==3.1.1 +PyPika==0.48.9 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-multipart==0.0.6 +pytz==2023.3.post1 +PyYAML==6.0.1 +referencing==0.32.0 +regex==2023.10.3 +requests==2.31.0 +requests-oauthlib==1.3.1 +rpds-py==0.15.2 +rsa==4.9 +semantic-version==2.10.0 +sentence-transformers==2.2.2 +six==1.16.0 +sniffio==1.3.0 +SQLAlchemy==2.0.23 +starlette==0.27.0 +sympy==1.12 +tenacity==8.2.3 +tiktoken==0.3.3 +tokenizers==0.15.0 +toolz==0.12.0 +tqdm==4.66.1 +typer==0.9.0 +typing-inspect==0.9.0 +typing_extensions==4.9.0 +tzdata==2023.3 +urllib3==1.26.18 +uvicorn==0.24.0.post1 +uvloop==0.19.0 +watchfiles==0.21.0 +websocket-client==1.7.0 +websockets==11.0.3 +wrapt==1.16.0 +yarl==1.9.4 +zipp==3.17.0 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/base_tool.py b/tools/base_tool.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..093e901e544695cf4083340d6789e4d718b2809f --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,2 @@ +from .image_encoder import * +from .pdf_reader import * \ No newline at end of file diff --git a/utils/__pycache__/__init__.cpython-311.pyc b/utils/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d26e98ad2fe821c16a30fab4394876598e27a424 Binary files /dev/null and b/utils/__pycache__/__init__.cpython-311.pyc differ diff --git a/utils/__pycache__/image_encoder.cpython-311.pyc b/utils/__pycache__/image_encoder.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..858f19b9221fb887c9f034bcb6e836c99ad45080 Binary files /dev/null and b/utils/__pycache__/image_encoder.cpython-311.pyc differ diff --git a/utils/__pycache__/pdf_reader.cpython-311.pyc b/utils/__pycache__/pdf_reader.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe55106ace8583079f7943661c88f7cddce4dff2 Binary files /dev/null and b/utils/__pycache__/pdf_reader.cpython-311.pyc differ diff --git a/utils/image_encoder.py b/utils/image_encoder.py new file mode 100644 index 0000000000000000000000000000000000000000..b9a8857a4db6380d1b65c119ecdae8f8e36a16b8 --- /dev/null +++ b/utils/image_encoder.py @@ -0,0 +1,8 @@ +import base64 + +def encode_image(image_path): + if image_path: + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode('utf-8') + else: + return "No image inputs" \ No newline at end of file diff --git a/utils/pdf_reader.py b/utils/pdf_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391