File size: 13,778 Bytes
2ce8f2c
 
 
 
 
 
3382eca
2ce8f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef89b98
 
2ce8f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef89b98
 
2ce8f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e667bce
2ce8f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e667bce
2ce8f2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
from huggingface_hub import InferenceClient
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import pipeline
from sentence_transformers.cross_encoder import CrossEncoder
import re
import os

def setupDB(domain, hasLLM):
    history = []
    history.append("")
    history.append("")
    crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base")
    models,allState = nandState()
    support_db = nandGetChroma(domain) 
    
    insts_db = nandGetChroma("insts")


    pdf_dbs = []
    if domain == 'en':
        pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"]
        for onepdf in pdfs:
            pdfdb =  nandGetChroma(onepdf)
            pdf_dbs.append(pdfdb)
    para = {}
    para['history'] = history
    para['disnum'] = 10
    para['domain'] = domain
    para['crossmodel'] = crossmodel
    para['insts_db'] = insts_db
    para['support_db'] = support_db
    para['pdf_dbs'] = pdf_dbs
    para['hasLLM'] = hasLLM
    return para
def remapScore(domain, inscore):
    if domain == 'ch':
        xin = 1 - inscore
        a = -0.2
        b = 1.2
        y = a * xin * xin + b * xin
        return int(y * 100)
    else:
        xin = 1 - inscore
        a = -1.2
        b = 2.2
        y = a * xin * xin + b * xin
        return int(y * 100)
       
def process_query(iniquery, para):
    query = re.sub("<br>", "", iniquery)
    ch2en, query = toEn(query)
    if ch2en:
        print(f"Received from connected users : {query}")
    else:
        print(f"Received from connected users : {query}", end='')
    disnum = para['disnum']
    domain = para['domain']
    history = para['history']
    crossmodel = para['crossmodel']
    insts_db = para['insts_db']
    support_db = para['support_db']
    pdf_dbs = para['pdf_dbs']
    hasLLM = para['hasLLM']
    ret = ""

    needScriptScores = crossmodel.predict([["write a perl ECO script", query]])
    print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}") 
    allapis = []
    threshold = 0.45
    if needScriptScores[0] > threshold:
        print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}") 
        retinsts = insts_db.similarity_search_with_score(query, k=10)
        accu = 0
        for inst in retinsts:
            instdoc = inst[0]
            instscore = inst[1]
            instname = instdoc.metadata['source']
            otherfile = re.sub("^insts", "src_en", instname)
            otherfile = re.sub("\.\d+", "", otherfile)
            if not otherfile in allapis:
                allapis.append(otherfile)
                modfile = otherfile.replace("\\", "/")
                apisize = os.path.getsize(modfile)
                accu += apisize
                print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}")
    
    results = []
    docs = support_db.similarity_search_with_score(query, k=8)
    for doc in docs:
        results.append([doc[0], doc[1]])
    for onepdfdb in pdf_dbs:
        pdocs = onepdfdb.similarity_search_with_score(query, k=8)
        for doc in pdocs:
            results.append([doc[0], doc[1]+0.2])
    results.sort(key=lambda x: x[1])
    docnum = len(results)
    index = 1
    for ii in range(docnum):
        doc = results[ii][0]
        source = doc.metadata['source']
        path = source #source.replace("\\", "/")
        #print(f"path={path}")
        if path in allapis:
            print(f"dont use path={path}, it's in instruction list")
            continue
        prefix = "Help:"
        if re.search("api\.", source):
            prefix = "API:"
        elif re.search("man\.", source):
            prefix = "Manual:"
        elif re.search("\.pdf$", source):
            prefix = "PDF:";
        score = remapScore(domain, results[ii][1])
        retcont = doc.page_content
        if re.search("\.pdf$", source):
            page = doc.metadata['page'] + 1
            subpage = doc.metadata['subpage']
            retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n"
        ret += f"Return {index} ({score}) {prefix} {retcont}\n"
        if len(ret) > 6000:
            break
        index += 1
        if index > disnum:
            break
    if hasLLM:
        context = "Context information is below\n---------------------\n"
        if len(allapis):
            context += scriptExamples()
            for oneapi in allapis:
                modfile = oneapi.replace("\\", "/")
                cont = GetContent(modfile)
                cont = re.sub("</h3>", " API Detail:", cont)
                cont = re.sub('<.*?>', '', cont)
                cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL)
                context += cont
        context += ret
        prompt = f"{context}\n"
        prompt += "------------------------------------------\n"
        if len(allapis):
            prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n"
            #prompt += "1. Following the format in the script examples provided above.\n"
            #prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n"
        else:
            prompt += "Given the context information and not prior knowledge, answer the query.\n"
        prompt += f"Query: {query}\n"
        
        llmout = llmGenerate(prompt)
        history[0] = query
        history[1] = llmout
        #return llmout
        outlen = len(llmout)
        prolen = len(prompt)
        print(f"Prompt len: {prolen} LLMOUT len: {outlen}")
        return llmout
        allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt
        return allret
    return ret

def toEn(intxt):
    pattern = re.compile(r'[\u4e00-\u9fff]+')
    if pattern.search(intxt):
        translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en")
        ini_text = translator(intxt, max_length=500)[0]['translation_text']
        out_text = re.sub("ECO foot", "ECO Script", ini_text)
        out_text = re.sub("web-based", "netlist", out_text)
        out_text = re.sub(r"\bweb\b", "netlist", out_text)
        out_text = re.sub(r"\bwebsheet\b", "netlist", out_text)
        out_text = re.sub(r"\bweblists?\b", "netlist", out_text)
        print(f"AFTER RESULT: {out_text}")
        return 1, out_text
    return 0, intxt
    


def nandGetChroma(domain):
    models,allState = nandState()
    chdb = allState[domain]['chroma']
    print(f"domain: {domain} has chroma dir {chdb}")
    model_ind = allState[domain]['model']
    model_name = models[model_ind]
    embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
    chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function)
    return chroma_db
def nandState():
    models = {'em': "all-MiniLM-L6-v2",
              'en': "all-mpnet-base-v2",
              'ch': "shibing624/text2vec-base-chinese-sentence"}
    # chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks
    allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0},
                'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0},
                'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1}
                }

    for ind in range(12):
        name = f"pdf_{ind}em"
        allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1}
    return models, allState
def formatPrompt(message, history):
    if history[0]:
        prompt = "Create a new query based on previous query/answer paire and current query:\n"
        prompt += f"Previous query: {history[0]}"
        prompt += f"Previous answer: {histroy[1]}"
        prompt += f"Current query: {message}"
        prompt += "New query:"
        return prompt
    return message

def llmNewQuery(prompt, history):
    newpend = formatPrompt(prompt, history)
    newquery = llmGenerate(newpend)
    return newquery

def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
    #temperature = float(temperature)
    #if temperature < 1e-2:
    #    temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
   
    stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        #yield output
    return output

   
def thoseRemove():
    those = ["redundant"]
    return those

def GetContent(file):
    fcont = ""
    with open(file) as f:
        fcont = f.read()
    return fcont

def scriptExamples():
    exp = """
#The first ECO scipt example for manual ECO:
use strict;
setup_eco("eco_example");
read_library("tsmc.5nm.lib");
read_design("-imp", "implementation.gv");
set_top("topmod");
change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-");
change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-");
change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-");
report_eco(); # ECO report
check_design();
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
#End of the manual ECO script example

#The second ECO script example for automatic ECO:
use strict;
setup_eco("eco_example");# Setup ECO name
read_library("tsmc.5nm.lib");# Read in standard library
# SVF files are optional, best to be used when the design involves multibit flops
#read_svf("-ref", "reference.svf.txt");       
#read_svf("-imp", "implementation.svf.txt"); 
read_design("-ref", "reference.gv");
read_design("-imp", "implementation.gv");
set_top("topmod");# Set the top module
# Preserve DFT Test Logic
set_ignore_output("scan_out*");
set_pin_constant("scan_enable", 0);
set_pin_constant("scan_mode", 0);
fix_design();
report_eco(); # ECO report
check_design();
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
run_lec(); # Run GOF LEC to generate Formality help files
#End of automatic ECO script example


#The third ECO script example is for automatic metal only ECO:
use strict;
setup_eco("eco_example");# Setup ECO name
read_library("tsmc.5nm.lib");# Read in standard library
# SVF files are optional, best to be used when the design involves multibit flops
#read_svf("-ref", "reference.svf.txt");     
#read_svf("-imp", "implementation.svf.txt"); 
read_design("-ref", "reference.gv");# Read in Reference Netlist
read_design("-imp", "implementation.gv");
set_top("topmod");# Set the top module
set_ignore_output("scan_out*");
set_pin_constant("scan_enable", 0);
set_pin_constant("scan_mode", 0);
read_lef("tsmc.lef"); # Read LEF
read_def("topmod.def"); # Read Design Exchange Format file
fix_design(); # Must run before get_spare_cells and map_spare_cells
get_spare_cells("*/*_SPARE*");
map_spare_cells();
report_eco(); # ECO report
check_design();# Check if the ECO causes any issue, like floating
write_verilog("eco_verilog.v");# Write out ECO result in Verilog
write_perl("eco_result.pl");# Write out result in Perl script
run_lec(); # Run GOF LEC to generate Formality help files
#End of automatic ECO script example

#The four ECO script example is the same as the third ECO script, except fix_design
# list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result
fix_design("-list_file", "the_eco_points.txt");    

#The 5th ECO script example is the same as the 3rd ECO script, except fix_design
# Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across
# module boundaries
fix_design("-flatten");

#The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC 
run_lec(); # Run GOF LEC to generate Formality help files
write_compare_points("compare_points.report");
write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder    

#The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells
fix_design(); # Must run before get_spare_cells and map_spare_cells
# Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only
get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*");
map_spare_cells();

#The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells
fix_design(); # Must run before get_spare_cells and map_spare_cells
get_spare_cells("-addfreed");
map_spare_cells();

#The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN"
use strict;
setup_eco("eco_example");
read_library("tsmc.3nm.lib");
read_design("-imp", "from_backend.gv");
set_top("topmod");
# Get all memories hierarchically, instance naming, "U_HMEM*"
my @mems = get_cells("-hier", "U_HMEM*");
foreach my $mem (@mems){
    change_pin("$mem/TEST_SHIFT", "TEST_EN");
}
report_eco(); # ECO report
check_design();
write_verilog("mem_eco.v"); 
    
    """
    return exp