Spaces:

gofeco
/

support

Sleeping

App Files Files Community

support / nand.py

gofeco

Update nand.py

fcbf96e verified 8 months ago

raw

history blame

14.3 kB

	from huggingface_hub import InferenceClient
	from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
	from langchain_community.vectorstores import Chroma
	from transformers import pipeline
	from sentence_transformers.cross_encoder import CrossEncoder
	import re
	import os

	def setupDB(domain, hasLLM):
	history = []
	history.append("")
	history.append("")
	crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base")
	models,allState = nandState()
	support_db = nandGetChroma(domain)

	insts_db = nandGetChroma("insts")


	pdf_dbs = []
	if domain == 'en':
	pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"]
	for onepdf in pdfs:
	pdfdb = nandGetChroma(onepdf)
	pdf_dbs.append(pdfdb)
	para = {}
	para['history'] = history
	para['disnum'] = 10
	para['domain'] = domain
	para['crossmodel'] = crossmodel
	para['insts_db'] = insts_db
	para['support_db'] = support_db
	para['pdf_dbs'] = pdf_dbs
	para['hasLLM'] = hasLLM
	return para
	def remapScore(domain, inscore):
	if domain == 'ch':
	xin = 1 - inscore
	a = -0.2
	b = 1.2
	y = a * xin * xin + b * xin
	return int(y * 100)
	else:
	xin = 1 - inscore
	a = -1.2
	b = 2.2
	y = a * xin * xin + b * xin
	return int(y * 100)

	def process_query(iniquery, para):
	query = re.sub("<br>", "", iniquery)
	ch2en, query = toEn(query)
	if ch2en:
	print(f"Received from connected users : {query}")
	else:
	print(f"Received from connected users : {query}", end='')
	disnum = para['disnum']
	domain = para['domain']
	history = para['history']
	crossmodel = para['crossmodel']
	insts_db = para['insts_db']
	support_db = para['support_db']
	pdf_dbs = para['pdf_dbs']
	hasLLM = para['hasLLM']
	ret = ""

	needScriptScores = crossmodel.predict([["write a perl ECO script", query]])
	print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}")
	allapis = []
	threshold = 0.45
	itisscript = 0
	if needScriptScores[0] > threshold:
	itisscript = 1
	print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}")
	retinsts = insts_db.similarity_search_with_score(query, k=10)
	accu = 0
	for inst in retinsts:
	instdoc = inst[0]
	instscore = inst[1]
	instname = instdoc.metadata['source']
	otherfile = re.sub("^insts", "src_en", instname)
	otherfile = re.sub("\.\d+", "", otherfile)
	if not otherfile in allapis:
	allapis.append(otherfile)
	modfile = otherfile.replace("\\", "/")
	apisize = os.path.getsize(modfile)
	accu += apisize
	print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}")

	results = []
	docs = support_db.similarity_search_with_score(query, k=8)
	for doc in docs:
	results.append([doc[0], doc[1]])
	for onepdfdb in pdf_dbs:
	pdocs = onepdfdb.similarity_search_with_score(query, k=8)
	for doc in pdocs:
	results.append([doc[0], doc[1]+0.2])
	results.sort(key=lambda x: x[1])
	docnum = len(results)
	index = 1
	for ii in range(docnum):
	doc = results[ii][0]
	source = doc.metadata['source']
	path = source #source.replace("\\", "/")
	#print(f"path={path}")
	if path in allapis:
	print(f"dont use path={path}, it's in instruction list")
	continue
	prefix = "Help:"
	if re.search("api\.", source):
	prefix = "API:"
	elif re.search("man\.", source):
	prefix = "Manual:"
	elif re.search("\.pdf$", source):
	prefix = "PDF:";
	score = remapScore(domain, results[ii][1])
	retcont = doc.page_content
	if re.search("\.pdf$", source):
	page = doc.metadata['page'] + 1
	subpage = doc.metadata['subpage']
	retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n"
	ret += f"Return {index} ({score}) {prefix} {retcont}\n"
	if len(ret) > 6000:
	break
	index += 1
	if index > disnum:
	break
	if hasLLM:
	context = "Context information is below\n---------------------\n"
	if len(allapis):
	context += scriptExamples()
	for oneapi in allapis:
	modfile = oneapi.replace("\\", "/")
	cont = GetContent(modfile)
	cont = re.sub("</h3>", " API Detail:", cont)
	cont = re.sub('<.*?>', '', cont)
	cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL)
	context += cont
	else:
	context += "GOF is abreviation of Gats On the Fly, it is netlist process platform.\n";
	context += "ECO is abbrevation of engineering change order.\n";
	context += "LEC is abbrevation of logic equivalence checking.\n";
	context += "Netlist ECO is to change netlist incrementally by tool or manually.\n";
	context += "Automatic ECO is to use GOF ECO to do functional netlist ECO automatically.\n";

	context += ret
	prompt = f"{context}\n"
	prompt += "------------------------------------------\n"
	if len(allapis):
	prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n"
	#prompt += "1. Following the format in the script examples provided above.\n"
	#prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n"
	else:
	prompt += "Given the context information and not prior knowledge, answer the query.\n"
	prompt += f"Query: {query}\n"

	llmout = llmGenerate(prompt)
	history[0] = query
	history[1] = llmout
	#return llmout
	outlen = len(llmout)
	prolen = len(prompt)
	print(f"Prompt len: {prolen} LLMOUT len: {outlen} itisscript: {itisscript}")
	return itisscript,llmout
	allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt
	return itisscript,allret
	return itisscript,ret

	def toEn(intxt):
	pattern = re.compile(r'[\u4e00-\u9fff]+')
	if pattern.search(intxt):
	translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en")
	ini_text = translator(intxt, max_length=500)[0]['translation_text']
	out_text = re.sub("ECO foot", "ECO Script", ini_text)
	out_text = re.sub("web-based", "netlist", out_text)
	out_text = re.sub(r"\bweb\b", "netlist", out_text)
	out_text = re.sub(r"\bwebsheet\b", "netlist", out_text)
	out_text = re.sub(r"\bweblists?\b", "netlist", out_text)
	print(f"AFTER RESULT: {out_text}")
	return 1, out_text
	return 0, intxt



	def nandGetChroma(domain):
	models,allState = nandState()
	chdb = allState[domain]['chroma']
	print(f"domain: {domain} has chroma dir {chdb}")
	model_ind = allState[domain]['model']
	model_name = models[model_ind]
	embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
	chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function)
	return chroma_db
	def nandState():
	models = {'em': "all-MiniLM-L6-v2",
	'en': "all-mpnet-base-v2",
	'ch': "shibing624/text2vec-base-chinese-sentence"}
	# chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks
	allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0},
	'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0},
	'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1}
	}

	for ind in range(12):
	name = f"pdf_{ind}em"
	allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1}
	return models, allState
	def formatPrompt(message, history):
	if history[0]:
	prompt = "Create a new query based on previous query/answer paire and current query:\n"
	prompt += f"Previous query: {history[0]}"
	prompt += f"Previous answer: {histroy[1]}"
	prompt += f"Current query: {message}"
	prompt += "New query:"
	return prompt
	return message

	def llmNewQuery(prompt, history):
	newpend = formatPrompt(prompt, history)
	newquery = llmGenerate(newpend)
	return newquery

	def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
	#temperature = float(temperature)
	#if temperature < 1e-2:
	# temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)
	llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")

	stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
	output = ""

	for response in stream:
	output += response.token.text
	#yield output
	return output


	def thoseRemove():
	those = ["redundant"]
	return those

	def GetContent(file):
	fcont = ""
	with open(file) as f:
	fcont = f.read()
	return fcont

	def scriptExamples():
	exp = """
	#The first ECO scipt example for manual ECO:
	use strict;
	setup_eco("eco_example");
	read_library("tsmc.5nm.lib");
	read_design("-imp", "implementation.gv");
	set_top("topmod");
	change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-");
	change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-");
	change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-");
	report_eco(); # ECO report
	check_design();
	write_verilog("eco_verilog.v");# Write out ECO result in Verilog
	#End of the manual ECO script example

	#The second ECO script example for automatic ECO:
	use strict;
	setup_eco("eco_example");# Setup ECO name
	read_library("tsmc.5nm.lib");# Read in standard library
	# SVF files are optional, best to be used when the design involves multibit flops
	#read_svf("-ref", "reference.svf.txt");
	#read_svf("-imp", "implementation.svf.txt");
	read_design("-ref", "reference.gv");
	read_design("-imp", "implementation.gv");
	set_top("topmod");# Set the top module
	# Preserve DFT Test Logic
	set_ignore_output("scan_out*");
	set_pin_constant("scan_enable", 0);
	set_pin_constant("scan_mode", 0);
	fix_design();
	report_eco(); # ECO report
	check_design();
	write_verilog("eco_verilog.v");# Write out ECO result in Verilog
	run_lec(); # Run GOF LEC to generate Formality help files
	#End of automatic ECO script example


	#The third ECO script example is for automatic metal only ECO:
	use strict;
	setup_eco("eco_example");# Setup ECO name
	read_library("tsmc.5nm.lib");# Read in standard library
	# SVF files are optional, best to be used when the design involves multibit flops
	#read_svf("-ref", "reference.svf.txt");
	#read_svf("-imp", "implementation.svf.txt");
	read_design("-ref", "reference.gv");# Read in Reference Netlist
	read_design("-imp", "implementation.gv");
	set_top("topmod");# Set the top module
	set_ignore_output("scan_out*");
	set_pin_constant("scan_enable", 0);
	set_pin_constant("scan_mode", 0);
	read_lef("tsmc.lef"); # Read LEF
	read_def("topmod.def"); # Read Design Exchange Format file
	fix_design(); # Must run before get_spare_cells and map_spare_cells
	get_spare_cells("/_SPARE*");
	map_spare_cells();
	report_eco(); # ECO report
	check_design();# Check if the ECO causes any issue, like floating
	write_verilog("eco_verilog.v");# Write out ECO result in Verilog
	write_perl("eco_result.pl");# Write out result in Perl script
	run_lec(); # Run GOF LEC to generate Formality help files
	#End of automatic ECO script example

	#The four ECO script example is the same as the third ECO script, except fix_design
	# list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result
	fix_design("-list_file", "the_eco_points.txt");

	#The 5th ECO script example is the same as the 3rd ECO script, except fix_design
	# Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across
	# module boundaries
	fix_design("-flatten");

	#The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC
	run_lec(); # Run GOF LEC to generate Formality help files
	write_compare_points("compare_points.report");
	write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder

	#The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells
	fix_design(); # Must run before get_spare_cells and map_spare_cells
	# Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only
	get_spare_cells("-gate_array", "G", "-gate_array_filler", "GFILL\|GDCAP*");
	map_spare_cells();

	#The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells
	fix_design(); # Must run before get_spare_cells and map_spare_cells
	get_spare_cells("-addfreed");
	map_spare_cells();

	#The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN"
	use strict;
	setup_eco("eco_example");
	read_library("tsmc.3nm.lib");
	read_design("-imp", "from_backend.gv");
	set_top("topmod");
	# Get all memories hierarchically, instance naming, "U_HMEM*"
	my @mems = get_cells("-hier", "U_HMEM*");
	foreach my $mem (@mems){
	change_pin("$mem/TEST_SHIFT", "TEST_EN");
	}
	report_eco(); # ECO report
	check_design();
	write_verilog("mem_eco.v");

	"""
	return exp