|
from huggingface_hub import InferenceClient |
|
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings |
|
from langchain_community.vectorstores import Chroma |
|
from transformers import pipeline |
|
from sentence_transformers.cross_encoder import CrossEncoder |
|
import re |
|
import os |
|
|
|
def setupDB(domain, hasLLM): |
|
history = [] |
|
history.append("") |
|
history.append("") |
|
crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base") |
|
models,allState = nandState() |
|
support_db = nandGetChroma(domain) |
|
|
|
insts_db = nandGetChroma("insts") |
|
|
|
|
|
pdf_dbs = [] |
|
if domain == 'en': |
|
pdfs = [] |
|
for onepdf in pdfs: |
|
pdfdb = nandGetChroma(onepdf) |
|
pdf_dbs.append(pdfdb) |
|
para = {} |
|
para['history'] = history |
|
para['disnum'] = 10 |
|
para['domain'] = domain |
|
para['crossmodel'] = crossmodel |
|
para['insts_db'] = insts_db |
|
para['support_db'] = support_db |
|
para['pdf_dbs'] = pdf_dbs |
|
para['hasLLM'] = hasLLM |
|
return para |
|
def remapScore(domain, inscore): |
|
if domain == 'ch': |
|
xin = 1 - inscore |
|
a = -0.2 |
|
b = 1.2 |
|
y = a * xin * xin + b * xin |
|
return int(y * 100) |
|
else: |
|
xin = 1 - inscore |
|
a = -1.2 |
|
b = 2.2 |
|
y = a * xin * xin + b * xin |
|
return int(y * 100) |
|
|
|
def process_query(iniquery, para): |
|
query = re.sub("<br>", "", iniquery) |
|
ch2en, query = toEn(query) |
|
if ch2en: |
|
print(f"Received from connected users : {query}") |
|
else: |
|
print(f"Received from connected users : {query}", end='') |
|
disnum = para['disnum'] |
|
domain = para['domain'] |
|
history = para['history'] |
|
crossmodel = para['crossmodel'] |
|
insts_db = para['insts_db'] |
|
support_db = para['support_db'] |
|
pdf_dbs = para['pdf_dbs'] |
|
hasLLM = para['hasLLM'] |
|
ret = "" |
|
|
|
needScriptScores = crossmodel.predict([["write a perl ECO script", query]]) |
|
print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}") |
|
allapis = [] |
|
threshold = 0.45 |
|
itisscript = 0 |
|
if needScriptScores[0] > threshold: |
|
itisscript = 1 |
|
print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}") |
|
retinsts = insts_db.similarity_search_with_score(query, k=10) |
|
accu = 0 |
|
for inst in retinsts: |
|
instdoc = inst[0] |
|
instscore = inst[1] |
|
instname = instdoc.metadata['source'] |
|
otherfile = re.sub("^insts", "src_en", instname) |
|
otherfile = re.sub("\.\d+", "", otherfile) |
|
if not otherfile in allapis: |
|
allapis.append(otherfile) |
|
modfile = otherfile.replace("\\", "/") |
|
apisize = os.path.getsize(modfile) |
|
accu += apisize |
|
print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}") |
|
|
|
results = [] |
|
docs = support_db.similarity_search_with_score(query, k=8) |
|
for doc in docs: |
|
results.append([doc[0], doc[1]]) |
|
for onepdfdb in pdf_dbs: |
|
pdocs = onepdfdb.similarity_search_with_score(query, k=8) |
|
for doc in pdocs: |
|
results.append([doc[0], doc[1]+0.2]) |
|
results.sort(key=lambda x: x[1]) |
|
docnum = len(results) |
|
index = 1 |
|
for ii in range(docnum): |
|
doc = results[ii][0] |
|
source = doc.metadata['source'] |
|
path = source |
|
|
|
if path in allapis: |
|
print(f"dont use path={path}, it's in instruction list") |
|
continue |
|
prefix = "Help:" |
|
if re.search("api\.", source): |
|
prefix = "API:" |
|
elif re.search("man\.", source): |
|
prefix = "Manual:" |
|
elif re.search("\.pdf$", source): |
|
prefix = "PDF:"; |
|
score = remapScore(domain, results[ii][1]) |
|
retcont = doc.page_content |
|
if re.search("\.pdf$", source): |
|
page = doc.metadata['page'] + 1 |
|
subpage = doc.metadata['subpage'] |
|
retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n" |
|
ret += f"Return {index} ({score}) {prefix} {retcont}\n" |
|
if len(ret) > 6000: |
|
break |
|
index += 1 |
|
if index > disnum: |
|
break |
|
if hasLLM: |
|
context = "Context information is below\n---------------------\n" |
|
if len(allapis): |
|
context += scriptExamples() |
|
for oneapi in allapis: |
|
modfile = oneapi.replace("\\", "/") |
|
cont = GetContent(modfile) |
|
cont = re.sub("</h3>", " API Detail:", cont) |
|
cont = re.sub('<.*?>', '', cont) |
|
cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL) |
|
context += cont |
|
else: |
|
context += "GOF is abreviation of Gats On the Fly, it is netlist process platform.\n"; |
|
context += "ECO is abbrevation of engineering change order.\n"; |
|
context += "LEC is abbrevation of logic equivalence checking.\n"; |
|
context += "Netlist ECO is to change netlist incrementally by tool or manually.\n"; |
|
context += "Automatic ECO is to use GOF ECO to do functional netlist ECO automatically.\n"; |
|
|
|
context += ret |
|
prompt = f"{context}\n" |
|
prompt += "------------------------------------------\n" |
|
if len(allapis): |
|
prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n" |
|
|
|
|
|
else: |
|
prompt += "Given the context information and not prior knowledge, answer the query.\n" |
|
prompt += f"Query: {query}\n" |
|
|
|
llmout = llmGenerate(prompt) |
|
history[0] = query |
|
history[1] = llmout |
|
|
|
outlen = len(llmout) |
|
prolen = len(prompt) |
|
print(f"Prompt len: {prolen} LLMOUT len: {outlen} itisscript: {itisscript}") |
|
return itisscript,llmout |
|
allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt |
|
return itisscript,allret |
|
return itisscript,ret |
|
|
|
def toEn(intxt): |
|
pattern = re.compile(r'[\u4e00-\u9fff]+') |
|
if pattern.search(intxt): |
|
translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en") |
|
ini_text = translator(intxt, max_length=500)[0]['translation_text'] |
|
out_text = re.sub("ECO foot", "ECO Script", ini_text) |
|
out_text = re.sub("web-based", "netlist", out_text) |
|
out_text = re.sub(r"\bweb\b", "netlist", out_text) |
|
out_text = re.sub(r"\bwebsheet\b", "netlist", out_text) |
|
out_text = re.sub(r"\bweblists?\b", "netlist", out_text) |
|
print(f"AFTER RESULT: {out_text}") |
|
return 1, out_text |
|
return 0, intxt |
|
|
|
|
|
|
|
def nandGetChroma(domain): |
|
models,allState = nandState() |
|
chdb = allState[domain]['chroma'] |
|
print(f"domain: {domain} has chroma dir {chdb}") |
|
model_ind = allState[domain]['model'] |
|
model_name = models[model_ind] |
|
embedding_function = SentenceTransformerEmbeddings(model_name=model_name) |
|
chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function) |
|
return chroma_db |
|
def nandState(): |
|
models = {'em': "all-MiniLM-L6-v2", |
|
'en': "all-mpnet-base-v2", |
|
'ch': "shibing624/text2vec-base-chinese-sentence"} |
|
|
|
allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0}, |
|
'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0}, |
|
'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1} |
|
} |
|
|
|
for ind in range(12): |
|
name = f"pdf_{ind}em" |
|
allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1} |
|
return models, allState |
|
def formatPrompt(message, history): |
|
if history[0]: |
|
prompt = "Create a new query based on previous query/answer paire and current query:\n" |
|
prompt += f"Previous query: {history[0]}" |
|
prompt += f"Previous answer: {histroy[1]}" |
|
prompt += f"Current query: {message}" |
|
prompt += "New query:" |
|
return prompt |
|
return message |
|
|
|
def llmNewQuery(prompt, history): |
|
newpend = formatPrompt(prompt, history) |
|
newquery = llmGenerate(newpend) |
|
return newquery |
|
|
|
def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0): |
|
|
|
|
|
|
|
top_p = float(top_p) |
|
|
|
generate_kwargs = dict( |
|
temperature=temperature, |
|
max_new_tokens=max_new_tokens, |
|
top_p=top_p, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
seed=42, |
|
) |
|
llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2") |
|
|
|
stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
output = "" |
|
|
|
for response in stream: |
|
output += response.token.text |
|
|
|
return output |
|
|
|
|
|
def thoseRemove(): |
|
those = ["redundant"] |
|
return those |
|
|
|
def GetContent(file): |
|
fcont = "" |
|
with open(file) as f: |
|
fcont = f.read() |
|
return fcont |
|
|
|
def scriptExamples(): |
|
exp = """ |
|
#The first ECO scipt example for manual ECO: |
|
use strict; |
|
setup_eco("eco_example"); |
|
read_library("tsmc.5nm.lib"); |
|
read_design("-imp", "implementation.gv"); |
|
set_top("topmod"); |
|
change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-"); |
|
change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-"); |
|
change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-"); |
|
report_eco(); # ECO report |
|
check_design(); |
|
write_verilog("eco_verilog.v");# Write out ECO result in Verilog |
|
#End of the manual ECO script example |
|
|
|
#The second ECO script example for automatic ECO: |
|
use strict; |
|
setup_eco("eco_example");# Setup ECO name |
|
read_library("tsmc.5nm.lib");# Read in standard library |
|
# SVF files are optional, best to be used when the design involves multibit flops |
|
#read_svf("-ref", "reference.svf.txt"); |
|
#read_svf("-imp", "implementation.svf.txt"); |
|
read_design("-ref", "reference.gv"); |
|
read_design("-imp", "implementation.gv"); |
|
set_top("topmod");# Set the top module |
|
# Preserve DFT Test Logic |
|
set_ignore_output("scan_out*"); |
|
set_pin_constant("scan_enable", 0); |
|
set_pin_constant("scan_mode", 0); |
|
fix_design(); |
|
report_eco(); # ECO report |
|
check_design(); |
|
write_verilog("eco_verilog.v");# Write out ECO result in Verilog |
|
run_lec(); # Run GOF LEC to generate Formality help files |
|
#End of automatic ECO script example |
|
|
|
|
|
#The third ECO script example is for automatic metal only ECO: |
|
use strict; |
|
setup_eco("eco_example");# Setup ECO name |
|
read_library("tsmc.5nm.lib");# Read in standard library |
|
# SVF files are optional, best to be used when the design involves multibit flops |
|
#read_svf("-ref", "reference.svf.txt"); |
|
#read_svf("-imp", "implementation.svf.txt"); |
|
read_design("-ref", "reference.gv");# Read in Reference Netlist |
|
read_design("-imp", "implementation.gv"); |
|
set_top("topmod");# Set the top module |
|
set_ignore_output("scan_out*"); |
|
set_pin_constant("scan_enable", 0); |
|
set_pin_constant("scan_mode", 0); |
|
read_lef("tsmc.lef"); # Read LEF |
|
read_def("topmod.def"); # Read Design Exchange Format file |
|
fix_design(); # Must run before get_spare_cells and map_spare_cells |
|
get_spare_cells("*/*_SPARE*"); |
|
map_spare_cells(); |
|
report_eco(); # ECO report |
|
check_design();# Check if the ECO causes any issue, like floating |
|
write_verilog("eco_verilog.v");# Write out ECO result in Verilog |
|
write_perl("eco_result.pl");# Write out result in Perl script |
|
run_lec(); # Run GOF LEC to generate Formality help files |
|
#End of automatic ECO script example |
|
|
|
#The four ECO script example is the same as the third ECO script, except fix_design |
|
# list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result |
|
fix_design("-list_file", "the_eco_points.txt"); |
|
|
|
#The 5th ECO script example is the same as the 3rd ECO script, except fix_design |
|
# Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across |
|
# module boundaries |
|
fix_design("-flatten"); |
|
|
|
#The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC |
|
run_lec(); # Run GOF LEC to generate Formality help files |
|
write_compare_points("compare_points.report"); |
|
write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder |
|
|
|
#The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells |
|
fix_design(); # Must run before get_spare_cells and map_spare_cells |
|
# Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only |
|
get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*"); |
|
map_spare_cells(); |
|
|
|
#The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells |
|
fix_design(); # Must run before get_spare_cells and map_spare_cells |
|
get_spare_cells("-addfreed"); |
|
map_spare_cells(); |
|
|
|
#The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN" |
|
use strict; |
|
setup_eco("eco_example"); |
|
read_library("tsmc.3nm.lib"); |
|
read_design("-imp", "from_backend.gv"); |
|
set_top("topmod"); |
|
# Get all memories hierarchically, instance naming, "U_HMEM*" |
|
my @mems = get_cells("-hier", "U_HMEM*"); |
|
foreach my $mem (@mems){ |
|
change_pin("$mem/TEST_SHIFT", "TEST_EN"); |
|
} |
|
report_eco(); # ECO report |
|
check_design(); |
|
write_verilog("mem_eco.v"); |
|
|
|
""" |
|
return exp |
|
|