caNanoLibrarian / src /app_utils /application_utils.py
ruiheesi
Added Application Files
12ac14b
import sql_utils as su
import gpt_utils as gu
import time
def database_structure_strings():
NanoEntDes_structure = "{table:NanoEntDes,attributes:" + \
"[ID(Primary),NanoEntity,Description]}"
NanoEntCom_structure = "{table:NanoEntCom,attributes:" + \
"[ID(Primary),NanoEntity,Composition,CompositionType," + \
"MolecularWeight,PubChemID]}"
FuncEntDes_structure = "{table: FuncEntDes, attributes:" + \
"[ID(Primary),FunctionEntity,FunctionEntityType," + \
"Description,ActivationMethod,pubChemID," + \
"MolarMass,MolarMassUnit]}"
FuncEntFunction_structure = "{table: FuncEntFunction," + \
"attributes:[ID(Primary),FunctionEntiry,Function," + \
"FunctionDescription]}"
ChemAsso_structure = "{table: ChemAsso, attributes:" + \
"[ID(Primary),AssociationType,BondType,Description," + \
"dataId,ComposingElementNameA,ComposingElementNameB," + \
"CompositiontypeB,CompositiontypeA,DomainElementNameB," + \
"DomainElementNameA,DomainAssociationId,ComposingElemetIdB," + \
"ComposingElemetIdA,ComposingElementTypeA,EntityDisplayNameB," + \
"ComposingElementTypeB,EntityDisplayNameA,AttachmentId}"
GeneralInfo_structure = "{table:GeneralInfo,attributes:" + \
"[ID(Primary),sampleName,createdYear,createdMonth]}"
SampleKeywords_structure = "{table:SampleKeyWords,attributes:" + \
"[ID(Primary),sampleName,SampleKeyWord]}"
PublicationInfo_structure = "{table:PublicationInfo," + \
"attributes:[ID(Primary),PMID,year,title,author," + \
"journal,publicationCategories,description]}"
PublicationKeyWords_structure = "{table:PublicationKeyWords," + \
"attributes:[ID(Primary),sampleName,SampleKeyWord]}"
CharacterizationInfo_structure = "{table:CharacterizationInfo," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,Protocol, " + \
"DesignDescription,AnalysisAndConclusion]}"
CharExpConfig_structure = "{table:CharExpConfig," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,ExpConfigTechnique, " + \
"ExpConfigInstruments,ExpConfigDescription]}"
CharResultDescriptions_structure = "{table:CharResultDescriptions," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharResultDescription]}"
CharResultKeywords_structure = "{table:CharResultKeywords," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharResultKeyword]}"
CharResultTables_structure = "{table:CharResultTables," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharTable]}"
note1 = "NanoEntCom should not be used to count unique " + \
"NanoEntity\ncount composition should only use composition table"
note2 = "ALL tables shoud join on ID. Table NanoEntDes and " + \
"NanoEntCom share key NanoEntity, " + \
"FuncEntDes and FuncEntFunction share FunctionEntity key, " + \
"ChemAsso does not have other common keys with other tables."
note3 = "Strictly reference to table name and columns in this context. "
Overall_structure = [
"\n",
NanoEntDes_structure,
NanoEntCom_structure,
FuncEntDes_structure,
FuncEntFunction_structure,
ChemAsso_structure,
GeneralInfo_structure,
SampleKeywords_structure,
PublicationInfo_structure,
PublicationKeyWords_structure,
CharacterizationInfo_structure,
CharExpConfig_structure,
CharResultDescriptions_structure,
CharResultKeywords_structure,
CharResultTables_structure,
note1,
note2,
note3
]
Overall_structure_string = "\n".join(Overall_structure)
return Overall_structure_string
def custom_query(
question,
connection,
GPT4=True,
print_prompt=False,
print_token=False,
print_query=False,
print_time=False
):
stucture = database_structure_strings()
context_token_count = gu.num_tokens_from_string(
stucture
)
prompt = su.sql_prompt(
question,
stucture
)
prompt_token_count = gu.num_tokens_from_string(prompt)
if print_prompt:
print(prompt)
if print_token:
print(f"\nContext token count: {context_token_count}")
print(f"Prompt token count: {prompt_token_count}")
start_time = time.time()
if GPT4:
query = gu.quick_ask(prompt, model_num=0)
else:
query = gu.quick_ask(prompt, model_num=1)
if print_query:
print("\n============= The Query is: ===============\n")
print(query)
print("\n===========================================\n")
result_df = su.submit_querry(query, connection)
# End the timer
end_time = time.time()
# Calculate the execution time
execution_time = end_time - start_time
# Print the execution time
if print_time:
print("\nExecution Time:", execution_time, "seconds")
return result_df, query