alfraser commited on
Commit
b897a48
1 Parent(s): f3f6cf6

Updated from using random.choices to random.sample throughout where I need a random distinct set as choices does replacement so you can get the same item twice. Discovered in pricing testing.

Browse files
pages/030_Test_Runner.py CHANGED
@@ -2,7 +2,7 @@ import regex as re
2
  import streamlit as st
3
 
4
  from pandas import DataFrame
5
- from random import choices
6
  from src.architectures import *
7
  from src.common import generate_group_tag
8
  from src.datatypes import *
@@ -82,9 +82,10 @@ def display_pricing_fact_test():
82
  st.write("### Run:")
83
  st.write(f"**{total_tests}** total tests will be run")
84
  if st.button("**Run**", disabled=(total_tests == 0), key="pricing_test_button"):
85
- question_price_pairs = choices(question_price_pairs, k=q_count)
86
  question_price_dict = {qpp[0]: qpp[1] for qpp in question_price_pairs}
87
  questions = list(question_price_dict.keys())
 
88
  answer_stats = {}
89
  for arch_name in selected_archs:
90
  answer_stats[arch_name] = [0, 0] # [correct, incorrect]
 
2
  import streamlit as st
3
 
4
  from pandas import DataFrame
5
+ from random import sample
6
  from src.architectures import *
7
  from src.common import generate_group_tag
8
  from src.datatypes import *
 
82
  st.write("### Run:")
83
  st.write(f"**{total_tests}** total tests will be run")
84
  if st.button("**Run**", disabled=(total_tests == 0), key="pricing_test_button"):
85
+ question_price_pairs = sample(question_price_pairs, k=q_count)
86
  question_price_dict = {qpp[0]: qpp[1] for qpp in question_price_pairs}
87
  questions = list(question_price_dict.keys())
88
+
89
  answer_stats = {}
90
  for arch_name in selected_archs:
91
  answer_stats[arch_name] = [0, 0] # [correct, incorrect]
src/data_synthesis/select_test_data_from_all_products.py CHANGED
@@ -10,7 +10,7 @@ process being statistical has clustered a lot of products around a 3.5 rating
10
  - hold some data back for specific tests - e.g. if I want to test subsequent addition of data
11
  """
12
  import os
13
- from random import randint, random, choices, shuffle
14
  import shutil
15
  import sqlite3
16
 
@@ -97,7 +97,7 @@ def winnow_prods_per_category(min_count: int = 10, max_count: int = 25):
97
  current_ids = [r[0] for r in execute_select(sql)]
98
  if len(current_ids) > target_prod_count:
99
  num_to_winnow = len(current_ids) - target_prod_count
100
- ids_to_drop = choices(current_ids, k=num_to_winnow)
101
  drop_products(ids_to_drop)
102
  print(f'Winnowed {c} to {target_prod_count} products')
103
  else:
 
10
  - hold some data back for specific tests - e.g. if I want to test subsequent addition of data
11
  """
12
  import os
13
+ from random import randint, random, sample, shuffle
14
  import shutil
15
  import sqlite3
16
 
 
97
  current_ids = [r[0] for r in execute_select(sql)]
98
  if len(current_ids) > target_prod_count:
99
  num_to_winnow = len(current_ids) - target_prod_count
100
+ ids_to_drop = sample(current_ids, k=num_to_winnow)
101
  drop_products(ids_to_drop)
102
  print(f'Winnowed {c} to {target_prod_count} products')
103
  else:
src/data_synthesis/test_question_generator.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  import json
6
  import sys
7
 
8
- from random import choice, choices, randint
9
  from typing import Dict
10
 
11
  from src.common import join_items_comma_and, data_dir
@@ -44,7 +44,7 @@ def get_random_values() -> Dict[str, str]:
44
  category_name = "TV"
45
  product: Product = choice(category.products)
46
 
47
- features: List[Feature] = choices(category.features, k=randint(1, 4))
48
  characteristic: str = choice([
49
  "big",
50
  "durable",
 
5
  import json
6
  import sys
7
 
8
+ from random import choice, sample, randint
9
  from typing import Dict
10
 
11
  from src.common import join_items_comma_and, data_dir
 
44
  category_name = "TV"
45
  product: Product = choice(category.products)
46
 
47
+ features: List[Feature] = sample(category.features, k=randint(1, 4))
48
  characteristic: str = choice([
49
  "big",
50
  "durable",
src/testing.py CHANGED
@@ -8,7 +8,7 @@ import sys
8
 
9
  from huggingface_hub import Repository
10
  from queue import Queue
11
- from random import choices
12
  from threading import Thread, Timer
13
  from typing import Dict, List, Optional, Tuple
14
 
@@ -108,7 +108,7 @@ class TestGenerator:
108
  Return n random questions
109
  """
110
  cls.load_questions()
111
- return choices(cls.questions, k=n)
112
 
113
 
114
  class ArchitectureRequestRecord:
 
8
 
9
  from huggingface_hub import Repository
10
  from queue import Queue
11
+ from random import sample
12
  from threading import Thread, Timer
13
  from typing import Dict, List, Optional, Tuple
14
 
 
108
  Return n random questions
109
  """
110
  cls.load_questions()
111
+ return sample(cls.questions, k=n)
112
 
113
 
114
  class ArchitectureRequestRecord:
src/training/prep_finetuning.py CHANGED
@@ -9,7 +9,7 @@ import pandas as pd
9
 
10
  from abc import ABC, abstractmethod
11
  from copy import copy
12
- from random import choice, choices, shuffle
13
  from time import time
14
  from typing import Tuple, Generator
15
 
 
9
 
10
  from abc import ABC, abstractmethod
11
  from copy import copy
12
+ from random import choice, shuffle
13
  from time import time
14
  from typing import Tuple, Generator
15