Spaces:
Sleeping
Sleeping
File size: 5,107 Bytes
12ac14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import sql_utils as su
import gpt_utils as gu
import time
def database_structure_strings():
NanoEntDes_structure = "{table:NanoEntDes,attributes:" + \
"[ID(Primary),NanoEntity,Description]}"
NanoEntCom_structure = "{table:NanoEntCom,attributes:" + \
"[ID(Primary),NanoEntity,Composition,CompositionType," + \
"MolecularWeight,PubChemID]}"
FuncEntDes_structure = "{table: FuncEntDes, attributes:" + \
"[ID(Primary),FunctionEntity,FunctionEntityType," + \
"Description,ActivationMethod,pubChemID," + \
"MolarMass,MolarMassUnit]}"
FuncEntFunction_structure = "{table: FuncEntFunction," + \
"attributes:[ID(Primary),FunctionEntiry,Function," + \
"FunctionDescription]}"
ChemAsso_structure = "{table: ChemAsso, attributes:" + \
"[ID(Primary),AssociationType,BondType,Description," + \
"dataId,ComposingElementNameA,ComposingElementNameB," + \
"CompositiontypeB,CompositiontypeA,DomainElementNameB," + \
"DomainElementNameA,DomainAssociationId,ComposingElemetIdB," + \
"ComposingElemetIdA,ComposingElementTypeA,EntityDisplayNameB," + \
"ComposingElementTypeB,EntityDisplayNameA,AttachmentId}"
GeneralInfo_structure = "{table:GeneralInfo,attributes:" + \
"[ID(Primary),sampleName,createdYear,createdMonth]}"
SampleKeywords_structure = "{table:SampleKeyWords,attributes:" + \
"[ID(Primary),sampleName,SampleKeyWord]}"
PublicationInfo_structure = "{table:PublicationInfo," + \
"attributes:[ID(Primary),PMID,year,title,author," + \
"journal,publicationCategories,description]}"
PublicationKeyWords_structure = "{table:PublicationKeyWords," + \
"attributes:[ID(Primary),sampleName,SampleKeyWord]}"
CharacterizationInfo_structure = "{table:CharacterizationInfo," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,Protocol, " + \
"DesignDescription,AnalysisAndConclusion]}"
CharExpConfig_structure = "{table:CharExpConfig," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,ExpConfigTechnique, " + \
"ExpConfigInstruments,ExpConfigDescription]}"
CharResultDescriptions_structure = "{table:CharResultDescriptions," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharResultDescription]}"
CharResultKeywords_structure = "{table:CharResultKeywords," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharResultKeyword]}"
CharResultTables_structure = "{table:CharResultTables," + \
"attributes:[ID(Primary),CharType,CharName," + \
"AssayType,CharTable]}"
note1 = "NanoEntCom should not be used to count unique " + \
"NanoEntity\ncount composition should only use composition table"
note2 = "ALL tables shoud join on ID. Table NanoEntDes and " + \
"NanoEntCom share key NanoEntity, " + \
"FuncEntDes and FuncEntFunction share FunctionEntity key, " + \
"ChemAsso does not have other common keys with other tables."
note3 = "Strictly reference to table name and columns in this context. "
Overall_structure = [
"\n",
NanoEntDes_structure,
NanoEntCom_structure,
FuncEntDes_structure,
FuncEntFunction_structure,
ChemAsso_structure,
GeneralInfo_structure,
SampleKeywords_structure,
PublicationInfo_structure,
PublicationKeyWords_structure,
CharacterizationInfo_structure,
CharExpConfig_structure,
CharResultDescriptions_structure,
CharResultKeywords_structure,
CharResultTables_structure,
note1,
note2,
note3
]
Overall_structure_string = "\n".join(Overall_structure)
return Overall_structure_string
def custom_query(
question,
connection,
GPT4=True,
print_prompt=False,
print_token=False,
print_query=False,
print_time=False
):
stucture = database_structure_strings()
context_token_count = gu.num_tokens_from_string(
stucture
)
prompt = su.sql_prompt(
question,
stucture
)
prompt_token_count = gu.num_tokens_from_string(prompt)
if print_prompt:
print(prompt)
if print_token:
print(f"\nContext token count: {context_token_count}")
print(f"Prompt token count: {prompt_token_count}")
start_time = time.time()
if GPT4:
query = gu.quick_ask(prompt, model_num=0)
else:
query = gu.quick_ask(prompt, model_num=1)
if print_query:
print("\n============= The Query is: ===============\n")
print(query)
print("\n===========================================\n")
result_df = su.submit_querry(query, connection)
# End the timer
end_time = time.time()
# Calculate the execution time
execution_time = end_time - start_time
# Print the execution time
if print_time:
print("\nExecution Time:", execution_time, "seconds")
return result_df, query
|