Spaces:

alfraser
/

llm-arch

Runtime error

alfraser commited on Feb 5

Commit

b897a48

•

1 Parent(s): f3f6cf6

Updated from using random.choices to random.sample throughout where I need a random distinct set as choices does replacement so you can get the same item twice. Discovered in pricing testing.

Files changed (5) hide show

pages/030_Test_Runner.py CHANGED Viewed

@@ -2,7 +2,7 @@ import regex as re
 import streamlit as st
 from pandas import DataFrame
-from random import choices
 from src.architectures import *
 from src.common import generate_group_tag
 from src.datatypes import *
@@ -82,9 +82,10 @@ def display_pricing_fact_test():
     st.write("### Run:")
     st.write(f"**{total_tests}** total tests will be run")
     if st.button("**Run**", disabled=(total_tests == 0), key="pricing_test_button"):
-        question_price_pairs = choices(question_price_pairs, k=q_count)
         question_price_dict = {qpp[0]: qpp[1] for qpp in question_price_pairs}
         questions = list(question_price_dict.keys())
         answer_stats = {}
         for arch_name in selected_archs:
             answer_stats[arch_name] = [0, 0]  # [correct, incorrect]

 import streamlit as st
 from pandas import DataFrame
+from random import sample
 from src.architectures import *
 from src.common import generate_group_tag
 from src.datatypes import *
     st.write("### Run:")
     st.write(f"**{total_tests}** total tests will be run")
     if st.button("**Run**", disabled=(total_tests == 0), key="pricing_test_button"):
+        question_price_pairs = sample(question_price_pairs, k=q_count)
         question_price_dict = {qpp[0]: qpp[1] for qpp in question_price_pairs}
         questions = list(question_price_dict.keys())
         answer_stats = {}
         for arch_name in selected_archs:
             answer_stats[arch_name] = [0, 0]  # [correct, incorrect]

src/data_synthesis/select_test_data_from_all_products.py CHANGED Viewed

@@ -10,7 +10,7 @@ process being statistical has clustered a lot of products around a 3.5 rating
 - hold some data back for specific tests - e.g. if I want to test subsequent addition of data
 """
 import os
-from random import randint, random, choices, shuffle
 import shutil
 import sqlite3
@@ -97,7 +97,7 @@ def winnow_prods_per_category(min_count: int = 10, max_count: int = 25):
         current_ids = [r[0] for r in execute_select(sql)]
         if len(current_ids) > target_prod_count:
             num_to_winnow = len(current_ids) - target_prod_count
-            ids_to_drop = choices(current_ids, k=num_to_winnow)
             drop_products(ids_to_drop)
             print(f'Winnowed {c} to {target_prod_count} products')
         else:

 - hold some data back for specific tests - e.g. if I want to test subsequent addition of data
 """
 import os
+from random import randint, random, sample, shuffle
 import shutil
 import sqlite3
         current_ids = [r[0] for r in execute_select(sql)]
         if len(current_ids) > target_prod_count:
             num_to_winnow = len(current_ids) - target_prod_count
+            ids_to_drop = sample(current_ids, k=num_to_winnow)
             drop_products(ids_to_drop)
             print(f'Winnowed {c} to {target_prod_count} products')
         else:

src/data_synthesis/test_question_generator.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import json
 import sys
-from random import choice, choices, randint
 from typing import Dict
 from src.common import join_items_comma_and, data_dir
@@ -44,7 +44,7 @@ def get_random_values() -> Dict[str, str]:
         category_name = "TV"
     product: Product = choice(category.products)
-    features: List[Feature] = choices(category.features, k=randint(1, 4))
     characteristic: str = choice([
         "big",
         "durable",

 import json
 import sys
+from random import choice, sample, randint
 from typing import Dict
 from src.common import join_items_comma_and, data_dir
         category_name = "TV"
     product: Product = choice(category.products)
+    features: List[Feature] = sample(category.features, k=randint(1, 4))
     characteristic: str = choice([
         "big",
         "durable",

src/testing.py CHANGED Viewed

@@ -8,7 +8,7 @@ import sys
 from huggingface_hub import Repository
 from queue import Queue
-from random import choices
 from threading import Thread, Timer
 from typing import Dict, List, Optional, Tuple
@@ -108,7 +108,7 @@ class TestGenerator:
         Return n random questions
         """
         cls.load_questions()
-        return choices(cls.questions, k=n)
 class ArchitectureRequestRecord:

 from huggingface_hub import Repository
 from queue import Queue
+from random import sample
 from threading import Thread, Timer
 from typing import Dict, List, Optional, Tuple
         Return n random questions
         """
         cls.load_questions()
+        return sample(cls.questions, k=n)
 class ArchitectureRequestRecord:

src/training/prep_finetuning.py CHANGED Viewed

@@ -9,7 +9,7 @@ import pandas as pd
 from abc import ABC, abstractmethod
 from copy import copy
-from random import choice, choices, shuffle
 from time import time
 from typing import Tuple, Generator

 from abc import ABC, abstractmethod
 from copy import copy
+from random import choice, shuffle
 from time import time
 from typing import Tuple, Generator