Vipitis commited on
Commit
98d4b40
1 Parent(s): db24268

additional model context

Browse files
Files changed (1) hide show
  1. app.py +31 -15
app.py CHANGED
@@ -269,10 +269,10 @@ outro_text ="""
269
  - [] dropdown for model selection (from curated list or all supported models?)
270
  - [] generation history stating which function and orig/generated returns. (use State ??). do it as comments in the code?
271
  - [~] display errros/issues to the user (raise gr.Error could be one idea, but highlighting in the code would be awesome) currently adds a comment to the code.
272
- - [] generate whole shaders (via prompts guidance, recursive from errors)
273
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
274
  - [] support FIM task for better model context
275
- - [x] include some context for prompt (title, comments before a functions) - now takes all comments directly before a function as well as all comments at the beginning inside a function.
276
  - [] gradio examples
277
  - [] use GPU if available, respect memory restrictions.
278
  - [x] stream model generation (maybe in a new window?) - janky solution and only sometimes hangs up
@@ -320,7 +320,7 @@ def grab_sample(sample_idx):
320
  # funcs = _parse_functions(sample_code)
321
  # func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)]
322
  # print(f"updating drop down to:{func_identifiers}")
323
- return sample_pass, sample_code, source_iframe, funcs#, gr.Dropdown.update(choices=func_identifiers) #, sample_title, sample_auhtor
324
 
325
 
326
  def _parse_functions(in_code):
@@ -395,6 +395,10 @@ def alter_return(orig_code, func_idx, temperature, max_new_tokens, top_p, repeti
395
  Args:
396
  orig_code (str): The original code.
397
  func_idx (int): The index of the function to replace the return statement of.
 
 
 
 
398
  pipeline (Pipeline): The pipeline to use for generation.
399
  Returns:
400
  str: The altered code.
@@ -472,22 +476,29 @@ def _get_docstrings(func_node):
472
  returns the docstring of a function node
473
  """
474
  docstring = ""
475
- for node in func_node.child_by_field_name("body").children[1:]:
476
- if node.type == "comment":
477
  docstring += node.text.decode() + "\n"
478
  else:
479
  return docstring
480
  return docstring
481
 
482
- def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
483
  """
484
  Replaces the body of a function with a generated one.
485
  Args:
486
  old_code (str): The original code.
487
  func_node (Node): The node of the function to replace the body of.
 
 
 
 
 
 
488
  pipeline (Pipeline): The pipeline to use for generation.
489
  Returns:
490
  str: The altered code.
 
491
  """
492
  if isinstance(func_id, str):
493
  print(f"{func_id=}")
@@ -517,8 +528,12 @@ def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens,
517
  # second_child = func_node.child_by_field_name("body").children[1] #might error out?
518
  docstring = _get_docstrings(func_node) #might be empty?
519
  if docstring:
520
- model_context = model_context + "\n{\n" + docstring + "\n"
521
- model_context = _grab_before_comments(func_node) + model_context
 
 
 
 
522
  print(f"{model_context=}")
523
  # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
524
  generation = _run_generation(model_context, pipeline, generation_kwargs)
@@ -568,13 +583,14 @@ with gr.Blocks() as site:
568
  model_cp = gr.Textbox(value="Vipitis/santacoder-finetuned-Shadertoys-fine", label="Model Checkpoint (Enter to load!)", interactive=True)
569
  sample_idx = gr.Slider(minimum=0, maximum=num_samples, value=3211, label="pick sample from dataset", step=1.0)
570
  func_dropdown = gr.Dropdown(value=["0: edit the Code (or load a shader) to update this dropdown"], label="chose a function to modify") #breaks if I add a string in before that? #TODO: use type="index" to get int - always gives None?
 
571
  with gr.Accordion("Advanced settings", open=False): # from: https://huggingface.co/spaces/bigcode/bigcode-playground/blob/main/app.py
572
  with gr.Row():
573
  column_1, column_2 = gr.Column(), gr.Column()
574
  with column_1:
575
  temperature = gr.Slider(
576
  label="Temperature",
577
- value=0.0, #start out at 0 to do greedy? or will there be an error?
578
  minimum=0.0,
579
  maximum=1.0,
580
  step=0.05,
@@ -583,7 +599,7 @@ with gr.Blocks() as site:
583
  )
584
  max_new_tokens = gr.Slider(
585
  label="Max new tokens",
586
- value=160,
587
  minimum=0,
588
  maximum=2048, #this could be inferred from the model?
589
  step=32,
@@ -593,7 +609,7 @@ with gr.Blocks() as site:
593
  with column_2:
594
  top_p = gr.Slider(
595
  label="Top-p (nucleus sampling)",
596
- value=0.85,
597
  minimum=0.0,
598
  maximum=1,
599
  step=0.05,
@@ -610,8 +626,8 @@ with gr.Blocks() as site:
610
  info="Penalize repeated tokens",
611
  )
612
  with gr.Row():
613
- gen_return_button = gr.Button("generate a alternate return statement", label="generate return")
614
- gen_func_button = gr.Button("generate an alternate function body", label="generate function")
615
  with gr.Row():
616
  with gr.Column():
617
  source_embed = gr.HTML('<iframe width="640" height="360" frameborder="0" src="" allowfullscreen></iframe>', label="How this shader originally renders")
@@ -627,9 +643,9 @@ with gr.Blocks() as site:
627
  # history_table = gr.JSON()
628
 
629
  model_cp.submit(fn=_make_pipeline, inputs=[model_cp], outputs=[pipe]) # how can we trigger this on load?
630
- sample_idx.release(fn=grab_sample, inputs=[sample_idx], outputs=[sample_pass, sample_code, source_embed])
631
  gen_return_button.click(fn=alter_return, inputs=[sample_code, func_dropdown, pipe], outputs=[sample_code])
632
- gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe]).then(
633
  fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]
634
  )
635
  sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]).then(
 
269
  - [] dropdown for model selection (from curated list or all supported models?)
270
  - [] generation history stating which function and orig/generated returns. (use State ??). do it as comments in the code?
271
  - [~] display errros/issues to the user (raise gr.Error could be one idea, but highlighting in the code would be awesome) currently adds a comment to the code.
272
+ - [~] generate whole shaders (via prompts guidance, recursive from errors) - prompt context is in progress.
273
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
274
  - [] support FIM task for better model context
275
+ - [x] include some context for prompt (title, comments before a functions) - now takes all comments directly before a function as well as all comments at the beginning inside a function. (misses comments between argument list and body)
276
  - [] gradio examples
277
  - [] use GPU if available, respect memory restrictions.
278
  - [x] stream model generation (maybe in a new window?) - janky solution and only sometimes hangs up
 
320
  # funcs = _parse_functions(sample_code)
321
  # func_identifiers = [f"{idx:2d}: {n.child_by_field_name('declarator').text.decode()}" for idx, n in enumerate(funcs)]
322
  # print(f"updating drop down to:{func_identifiers}")
323
+ return sample_pass, sample_code, sample_title, source_iframe, funcs#, gr.Dropdown.update(choices=func_identifiers) #, sample_title, sample_auhtor
324
 
325
 
326
  def _parse_functions(in_code):
 
395
  Args:
396
  orig_code (str): The original code.
397
  func_idx (int): The index of the function to replace the return statement of.
398
+ temperature (float): The temperature to use for generation.
399
+ max_new_tokens (int): The maximum number of tokens to generate.
400
+ top_p (float): The top_p to use for generation.
401
+ repetition_penalty (float): The repetition_penalty to use for generation.
402
  pipeline (Pipeline): The pipeline to use for generation.
403
  Returns:
404
  str: The altered code.
 
476
  returns the docstring of a function node
477
  """
478
  docstring = ""
479
+ for node in func_node.child_by_field_name("body").children:
480
+ if node.type == "comment" or node.type == "{":
481
  docstring += node.text.decode() + "\n"
482
  else:
483
  return docstring
484
  return docstring
485
 
486
+ def alter_body(old_code, func_id, funcs_list: list, prompt, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
487
  """
488
  Replaces the body of a function with a generated one.
489
  Args:
490
  old_code (str): The original code.
491
  func_node (Node): The node of the function to replace the body of.
492
+ funcs_list (list): The list of all functions in the code.
493
+ prompt (str): The prompt(title) to use for generation.
494
+ temperature (float): The temperature to use for generation.
495
+ max_new_tokens (int): The maximum number of tokens to generate.
496
+ top_p (float): The top_p to use for generation.
497
+ repetition_penalty (float): The repetition_penalty to use for generation.
498
  pipeline (Pipeline): The pipeline to use for generation.
499
  Returns:
500
  str: The altered code.
501
+ pipeline (Pipeline): The pipeline to update the state
502
  """
503
  if isinstance(func_id, str):
504
  print(f"{func_id=}")
 
528
  # second_child = func_node.child_by_field_name("body").children[1] #might error out?
529
  docstring = _get_docstrings(func_node) #might be empty?
530
  if docstring:
531
+ model_context = model_context + "\n" + docstring
532
+ model_context = _grab_before_comments(func_node) + model_context #prepend comments
533
+ if prompt != "":
534
+ model_context = f"//avialable functions: {','.join([n.child_by_field_name('declarator').text.decode() for n in funcs_list])}\n" + model_context #prepend available functions
535
+ model_context = "//Title: " + prompt + "\n" + model_context #prepend user prompt/title
536
+ model_context = "//Language: Shadertoy GLSL fragment shader\n" + model_context #prepend system prompt, language hint
537
  print(f"{model_context=}")
538
  # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
539
  generation = _run_generation(model_context, pipeline, generation_kwargs)
 
583
  model_cp = gr.Textbox(value="Vipitis/santacoder-finetuned-Shadertoys-fine", label="Model Checkpoint (Enter to load!)", interactive=True)
584
  sample_idx = gr.Slider(minimum=0, maximum=num_samples, value=3211, label="pick sample from dataset", step=1.0)
585
  func_dropdown = gr.Dropdown(value=["0: edit the Code (or load a shader) to update this dropdown"], label="chose a function to modify") #breaks if I add a string in before that? #TODO: use type="index" to get int - always gives None?
586
+ prompt_text = gr.Textbox(value="the title used by the model has generation hint", label="prompt text", info="leave blank to skip", interactive=True)
587
  with gr.Accordion("Advanced settings", open=False): # from: https://huggingface.co/spaces/bigcode/bigcode-playground/blob/main/app.py
588
  with gr.Row():
589
  column_1, column_2 = gr.Column(), gr.Column()
590
  with column_1:
591
  temperature = gr.Slider(
592
  label="Temperature",
593
+ value=0.2, #start out at 0 to do greedy? or will there be an error?
594
  minimum=0.0,
595
  maximum=1.0,
596
  step=0.05,
 
599
  )
600
  max_new_tokens = gr.Slider(
601
  label="Max new tokens",
602
+ value=265,
603
  minimum=0,
604
  maximum=2048, #this could be inferred from the model?
605
  step=32,
 
609
  with column_2:
610
  top_p = gr.Slider(
611
  label="Top-p (nucleus sampling)",
612
+ value=0.90,
613
  minimum=0.0,
614
  maximum=1,
615
  step=0.05,
 
626
  info="Penalize repeated tokens",
627
  )
628
  with gr.Row():
629
+ gen_return_button = gr.Button("generate a alternate return statement", label="generate return", scale=0)
630
+ gen_func_button = gr.Button("generate an alternate function body", label="generate function", scale=1)
631
  with gr.Row():
632
  with gr.Column():
633
  source_embed = gr.HTML('<iframe width="640" height="360" frameborder="0" src="" allowfullscreen></iframe>', label="How this shader originally renders")
 
643
  # history_table = gr.JSON()
644
 
645
  model_cp.submit(fn=_make_pipeline, inputs=[model_cp], outputs=[pipe]) # how can we trigger this on load?
646
+ sample_idx.release(fn=grab_sample, inputs=[sample_idx], outputs=[sample_pass, sample_code, prompt_text, source_embed]) #funcs here?
647
  gen_return_button.click(fn=alter_return, inputs=[sample_code, func_dropdown, pipe], outputs=[sample_code])
648
+ gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, prompt_text, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe]).then(
649
  fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]
650
  )
651
  sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]).then(