Vipitis commited on
Commit
46e097d
1 Parent(s): ed9c55f

fix to the generation shema

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -423,18 +423,18 @@ def alter_body(old_code, func_id: str, funcs_list: list, pipeline=PIPE):
423
  second_child = func_node.child_by_field_name("body").children[1] #might error out?
424
  if second_child.type == "comment":
425
  # print(second_child.text.decode())
426
- model_context += second_child.text.decode()
427
  print(f"{model_context=}")
428
  num_new_tokens = max(160,(body_end_idx - body_start_idx) + 10) #TODO: approximation, we do have early stopping? maybe also use a number instead? HARD MAX for performance limits.
429
 
430
  print(f"generating up to {num_new_tokens} after {model_context!r}")
431
  generation = pipeline(model_context, max_new_tokens=num_new_tokens, return_full_text=False)[0]["generated_text"]
432
  print(f"{generation=}")
433
- id_with_generation = identifier_str + generation
434
- print(f"{id_with_generation=}")
435
  try:
436
  #strip the body
437
- first_gened_func = _parse_functions(id_with_generation)[0] # truncate generation to a single function?
438
  except IndexError:
439
  print("generation wasn't a full function.")
440
  altered_code = old_code[:func_start_idx] + model_context + generation + "//the generation didn't complete the function!\n" + old_code[body_end_idx:] #needs a newline to break out of the comment.
@@ -443,7 +443,7 @@ def alter_body(old_code, func_id: str, funcs_list: list, pipeline=PIPE):
443
  print(f"{first_gened_func=}")
444
  generated_body = first_gened_func.child_by_field_name("body").text.decode()
445
  print(f"{generated_body=}")
446
- altered_code = old_code[:func_start_idx] + model_context + generated_body + old_code[body_end_idx:]
447
  return altered_code, pipeline
448
 
449
  def add_history(func_id, orig_rtn, gened_rtn, history):
 
423
  second_child = func_node.child_by_field_name("body").children[1] #might error out?
424
  if second_child.type == "comment":
425
  # print(second_child.text.decode())
426
+ model_context += " { \n " + second_child.text.decode()
427
  print(f"{model_context=}")
428
  num_new_tokens = max(160,(body_end_idx - body_start_idx) + 10) #TODO: approximation, we do have early stopping? maybe also use a number instead? HARD MAX for performance limits.
429
 
430
  print(f"generating up to {num_new_tokens} after {model_context!r}")
431
  generation = pipeline(model_context, max_new_tokens=num_new_tokens, return_full_text=False)[0]["generated_text"]
432
  print(f"{generation=}")
433
+ ctx_with_generation = model_context + generation
434
+ print(f"{ctx_with_generation=}")
435
  try:
436
  #strip the body
437
+ first_gened_func = _parse_functions(ctx_with_generation)[0] # truncate generation to a single function?
438
  except IndexError:
439
  print("generation wasn't a full function.")
440
  altered_code = old_code[:func_start_idx] + model_context + generation + "//the generation didn't complete the function!\n" + old_code[body_end_idx:] #needs a newline to break out of the comment.
 
443
  print(f"{first_gened_func=}")
444
  generated_body = first_gened_func.child_by_field_name("body").text.decode()
445
  print(f"{generated_body=}")
446
+ altered_code = old_code[:func_start_idx] + identifier_str + generated_body + old_code[body_end_idx:]
447
  return altered_code, pipeline
448
 
449
  def add_history(func_id, orig_rtn, gened_rtn, history):