Spaces:
Running
Running
File size: 6,159 Bytes
b87c5b6 393df19 988a46d 393df19 231c41d 077768b 11194f5 231c41d 393df19 231c41d 988a46d 393df19 231c41d 393df19 988a46d 393df19 988a46d b87c5b6 393df19 988a46d 393df19 b87c5b6 393df19 11194f5 c4cb68e 988a46d c4cb68e b9c182b 988a46d 393df19 231c41d 393df19 8a87492 231c41d b9c182b 231c41d b9c182b 11194f5 393df19 b9c182b 231c41d b9c182b b87c5b6 231c41d b9c182b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core import load_index_from_storage
from llama_index.core import PromptTemplate
from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
STORE_DIR = "openworm.ai_store"
SOURCE_DOCUMENT = "source document"
LLM_GPT4o = "GPT4o"
def print_(text):
print(text)
def load_index(model):
OLLAMA_MODEL = model.replace("Ollama:", "") if model is not LLM_GPT4o else None
print_("Creating a storage context for %s" % model)
STORE_SUBFOLDER = (
"" if OLLAMA_MODEL is None else "/%s" % OLLAMA_MODEL.replace(":", "_")
)
# index_reloaded =SimpleIndexStore.from_persist_dir(persist_dir=INDEX_STORE_DIR)
storage_context = StorageContext.from_defaults(
docstore=SimpleDocumentStore.from_persist_dir(
persist_dir=STORE_DIR + STORE_SUBFOLDER
),
vector_store=SimpleVectorStore.from_persist_dir(
persist_dir=STORE_DIR + STORE_SUBFOLDER
),
index_store=SimpleIndexStore.from_persist_dir(
persist_dir=STORE_DIR + STORE_SUBFOLDER
),
)
print_("Reloading index for %s" % model)
index_reloaded = load_index_from_storage(storage_context)
return index_reloaded
def get_query_engine(index_reloaded, model, similarity_top_k=4):
OLLAMA_MODEL = model.replace("Ollama:", "") if model is not LLM_GPT4o else None
print_("Creating query engine for %s" % model)
# Based on: https://docs.llamaindex.ai/en/stable/examples/customization/prompts/completion_prompts/
text_qa_template_str = (
"Context information is"
" below.\n---------------------\n{context_str}\n---------------------\nUsing"
" both the context information and also using your own knowledge, answer"
" the question: {query_str}\nIf the context isn't helpful, you can also"
" answer the question on your own.\n"
)
text_qa_template = PromptTemplate(text_qa_template_str)
refine_template_str = (
"The original question is as follows: {query_str}\nWe have provided an"
" existing answer: {existing_answer}\nWe have the opportunity to refine"
" the existing answer (only if needed) with some more context"
" below.\n------------\n{context_msg}\n------------\nUsing both the new"
" context and your own knowledge, update or repeat the existing answer.\n"
)
refine_template = PromptTemplate(refine_template_str)
# create a query engine for the index
if OLLAMA_MODEL is not None:
llm = Ollama(model=OLLAMA_MODEL)
ollama_embedding = OllamaEmbedding(
model_name=OLLAMA_MODEL,
)
query_engine = index_reloaded.as_query_engine(
llm=llm,
text_qa_template=text_qa_template,
refine_template=refine_template,
embed_model=ollama_embedding,
)
query_engine.retriever.similarity_top_k = similarity_top_k
else: # use OpenAI...
# configure retriever
retriever = VectorIndexRetriever(
index=index_reloaded,
similarity_top_k=similarity_top_k,
)
# configure response synthesizer
response_synthesizer = get_response_synthesizer(
response_mode="refine",
text_qa_template=text_qa_template,
refine_template=refine_template,
)
query_engine = RetrieverQueryEngine(
retriever=retriever,
response_synthesizer=response_synthesizer,
)
return query_engine
llm_ver = LLM_GPT4o
index_reloaded = load_index(llm_ver)
query_engine = get_query_engine(index_reloaded, llm_ver)
def process_query(query, model=llm_ver):
response = query_engine.query(query)
response_text = str(response)
if "<think>" in response_text: # Give deepseek a fighting chance...
response_text = (
response_text[0 : response_text.index("<think>")]
+ response_text[response_text.index("</think>") + 8 :]
)
metadata = response.metadata
cutoff = 0.2
files_used = []
for sn in response.source_nodes:
# print(sn)
sd = sn.metadata["source document"]
if "et_al_" in sd:
sd = sd.replace("WormAtlas Handbook:", "Paper: ")
if sd not in files_used:
if len(files_used) == 0 or sn.score >= cutoff:
files_used.append(f"{sd} (score: {sn.score})")
file_info = ",\n ".join(files_used)
print_(f"""
===============================================================================
QUERY: {query}
MODEL: {model}
-------------------------------------------------------------------------------
RESPONSE: {response_text}
SOURCES:
{file_info}
===============================================================================
""")
return response_text, metadata
def run_query(query):
response_text, metadata = process_query(query, "GPT4o")
files_used = []
for k in metadata:
v = metadata[k]
if SOURCE_DOCUMENT in v:
if v[SOURCE_DOCUMENT] not in files_used:
sd = v[SOURCE_DOCUMENT]
if "et_al_" in sd:
sd = sd.replace("WormAtlas Handbook:", "Paper: ")
files_used.append(sd)
srcs = "\n- ".join(files_used)
answer = f"""{response_text}
SOURCES OF ANSWER:
- {srcs}"""
return answer
if __name__ == "__main__":
print("Running queries")
queries = [
"What are the main types of neurons and muscles in the C. elegans pharynx?",
"Tell me about the egg laying apparatus",
]
for query in queries:
print("-------------------")
print("Q: %s" % query)
# answer = docs.query(query)
answer = run_query(query)
print("A: %s" % answer)
|