dhuynh95 commited on
Commit
634e585
1 Parent(s): a1b5baa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -8,7 +8,6 @@ import evaluate
8
  bleu = evaluate.load("bleu")
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
- print(HF_TOKEN)
12
  client = InferenceClient(model="bigcode/starcoder", token=HF_TOKEN)
13
 
14
  login(token=HF_TOKEN)
@@ -16,7 +15,7 @@ checkpoint = "bigcode/starcoder"
16
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_auth_token=True)
17
 
18
  df = pd.read_csv("samples.csv")
19
- sample_df = df.loc[~df.prediction_50.isna()]
20
 
21
  description = "<h1 style='text-align: center; color: #333333; font-size: 40px;'>StarCoder Memorization Verifier"
22
  high_bleu_examples = {
@@ -190,6 +189,10 @@ def low_bleu_mirror(x):
190
  output = low_bleu_examples[x]
191
  return output
192
 
 
 
 
 
193
  with gr.Blocks() as demo:
194
  with gr.Column():
195
  gr.Markdown(description)
@@ -201,8 +204,15 @@ with gr.Blocks() as demo:
201
  label="Original",
202
  )
203
 
 
 
 
 
204
  with gr.Accordion("Advanced parameters", open=False):
205
- k = gr.Slider(minimum=1, maximum=250, value=50)
 
 
 
206
  submit = gr.Button("Check", variant="primary")
207
  high_bleu_examples = gr.Examples(list(high_bleu_examples.keys()), label="High memorization samples",
208
  inputs=instruction, outputs=instruction,
@@ -211,13 +221,22 @@ with gr.Blocks() as demo:
211
  inputs=instruction, outputs=instruction,
212
  fn=low_bleu_mirror, cache_examples=True)
213
  with gr.Column():
214
- output = gr.Textbox(lines=5,
215
- label="Completion", interactive=False)
216
- label = gr.Label(value={"BLEU": 0},
217
- label="Similarity score (BLEU)")
 
 
 
 
 
 
 
 
218
  submit.click(
219
  complete,
220
  inputs=[instruction, k],
221
  outputs=[output, label],
222
  )
 
223
  demo.queue(concurrency_count=16).launch(debug=True)
 
8
  bleu = evaluate.load("bleu")
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
11
  client = InferenceClient(model="bigcode/starcoder", token=HF_TOKEN)
12
 
13
  login(token=HF_TOKEN)
 
15
  tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_auth_token=True)
16
 
17
  df = pd.read_csv("samples.csv")
18
+ df = df[["content"]].iloc[:50]
19
 
20
  description = "<h1 style='text-align: center; color: #333333; font-size: 40px;'>StarCoder Memorization Verifier"
21
  high_bleu_examples = {
 
189
  output = low_bleu_examples[x]
190
  return output
191
 
192
+ def df_select(evt: gr.SelectData):
193
+
194
+ return evt.value
195
+
196
  with gr.Blocks() as demo:
197
  with gr.Column():
198
  gr.Markdown(description)
 
204
  label="Original",
205
  )
206
 
207
+ with gr.Column():
208
+ output = gr.Textbox(lines=5, label="Completion", interactive=False)
209
+ with gr.Row():
210
+ with gr.Column():
211
  with gr.Accordion("Advanced parameters", open=False):
212
+ k = gr.Slider(minimum=1, maximum=250, value=50,
213
+ label="Prefix size",
214
+ info="""Number of tokens used in the prompt.
215
+ Lower (higher) levels reduce (increase) the risk of memorization, as large context length increase memorization risks.""")
216
  submit = gr.Button("Check", variant="primary")
217
  high_bleu_examples = gr.Examples(list(high_bleu_examples.keys()), label="High memorization samples",
218
  inputs=instruction, outputs=instruction,
 
221
  inputs=instruction, outputs=instruction,
222
  fn=low_bleu_mirror, cache_examples=True)
223
  with gr.Column():
224
+ label = gr.Label(value={"BLEU": 0},label="Memorization score (BLEU)")
225
+ gr.Markdown("""[BLEU](https://huggingface.co/spaces/evaluate-metric/bleu) score is a metric that can be used to measure similarity of two sentences.
226
+ Here, the higher the BLEU score, the more likely the model learn by heart that example.
227
+ You can reduce the Prefix size in the Advanced parameters to reduce the context length and see if the model still extracts the training sample.""")
228
+
229
+ with gr.Row():
230
+ with gr.Column():
231
+ gr.Markdown("""# More samples from The Stack.
232
+ The examples shown above come from [The Stack](https://huggingface.co/datasets/bigcode/the-stack-dedup), an open-source dataset of code data.
233
+ To try other examples from The Stack, you can browse the table below and click on training samples you wish to assess the memorisation score.""")
234
+ with gr.Accordion("More samples", open=False):
235
+ table = gr.DataFrame(value=df, row_count=5, label="Samples from The Stack", interactive=False)
236
  submit.click(
237
  complete,
238
  inputs=[instruction, k],
239
  outputs=[output, label],
240
  )
241
+ table.select(fn=df_select, outputs=instruction)
242
  demo.queue(concurrency_count=16).launch(debug=True)