Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	reject duplicate submission
Browse files
    	
        app.py
    CHANGED
    
    | @@ -15,7 +15,21 @@ H4_TOKEN = os.environ.get("H4_TOKEN", None) | |
| 15 | 
             
            LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         | 
| 16 | 
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
         | 
| 17 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 18 | 
             
            repo = None
         | 
|  | |
| 19 | 
             
            if H4_TOKEN:
         | 
| 20 | 
             
                print("pulling repo")
         | 
| 21 | 
             
                # try:
         | 
| @@ -31,6 +45,9 @@ if H4_TOKEN: | |
| 31 | 
             
                )
         | 
| 32 | 
             
                repo.git_pull()
         | 
| 33 |  | 
|  | |
|  | |
|  | |
| 34 |  | 
| 35 | 
             
            # parse the results
         | 
| 36 | 
             
            BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
         | 
| @@ -110,7 +127,7 @@ def get_leaderboard(): | |
| 110 |  | 
| 111 | 
             
                dataframe = pd.DataFrame.from_records(all_data)
         | 
| 112 | 
             
                dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
         | 
| 113 | 
            -
                print(dataframe)
         | 
| 114 | 
             
                dataframe = dataframe[COLS]
         | 
| 115 | 
             
                return dataframe
         | 
| 116 |  | 
| @@ -187,12 +204,12 @@ def add_new_eval( | |
| 187 | 
             
                if is_delta_weight and not is_model_on_hub(base_model, revision):
         | 
| 188 | 
             
                    error_message = f'Base model "{base_model}" was not found on hub!'
         | 
| 189 | 
             
                    print(error_message)
         | 
| 190 | 
            -
                    return f"<p style='color: red; font-size:  | 
| 191 |  | 
| 192 | 
             
                if not is_model_on_hub(model, revision):
         | 
| 193 | 
             
                    error_message = f'Model "{model}"was not found on hub!'
         | 
| 194 | 
             
                    print(error_message)
         | 
| 195 | 
            -
                    return f"<p style='color: red; font-size:  | 
| 196 |  | 
| 197 | 
             
                print("adding new eval")
         | 
| 198 |  | 
| @@ -216,6 +233,11 @@ def add_new_eval( | |
| 216 | 
             
                os.makedirs(OUT_DIR, exist_ok=True)
         | 
| 217 | 
             
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
         | 
| 218 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 219 | 
             
                with open(out_path, "w") as f:
         | 
| 220 | 
             
                    f.write(json.dumps(eval_entry))
         | 
| 221 | 
             
                LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         | 
| @@ -230,7 +252,7 @@ def add_new_eval( | |
| 230 | 
             
                )
         | 
| 231 |  | 
| 232 | 
             
                success_message = "Your request has been submitted to the evaluation queue!"
         | 
| 233 | 
            -
                return f"<p style='color: green; font-size:  | 
| 234 |  | 
| 235 |  | 
| 236 | 
             
            def refresh():
         | 
|  | |
| 15 | 
             
            LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         | 
| 16 | 
             
            IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", None))
         | 
| 17 |  | 
| 18 | 
            +
             | 
| 19 | 
            +
            def get_all_requested_models(requested_models_dir):
         | 
| 20 | 
            +
                depth = 1
         | 
| 21 | 
            +
                file_names = []
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                for root, dirs, files in os.walk(requested_models_dir):
         | 
| 24 | 
            +
                    current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
         | 
| 25 | 
            +
                    if current_depth == depth:
         | 
| 26 | 
            +
                        file_names.extend([os.path.join(root, file) for file in files])
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                return set([file_name.lower().split("./evals/")[1] for file_name in file_names])
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 31 | 
             
            repo = None
         | 
| 32 | 
            +
            requested_models = None
         | 
| 33 | 
             
            if H4_TOKEN:
         | 
| 34 | 
             
                print("pulling repo")
         | 
| 35 | 
             
                # try:
         | 
|  | |
| 45 | 
             
                )
         | 
| 46 | 
             
                repo.git_pull()
         | 
| 47 |  | 
| 48 | 
            +
                requested_models_dir = "./evals/eval_requests"
         | 
| 49 | 
            +
                requested_models = get_all_requested_models(requested_models_dir)
         | 
| 50 | 
            +
             | 
| 51 |  | 
| 52 | 
             
            # parse the results
         | 
| 53 | 
             
            BENCHMARKS = ["arc_challenge", "hellaswag", "hendrycks", "truthfulqa_mc"]
         | 
|  | |
| 127 |  | 
| 128 | 
             
                dataframe = pd.DataFrame.from_records(all_data)
         | 
| 129 | 
             
                dataframe = dataframe.sort_values(by=["Average ⬆️"], ascending=False)
         | 
| 130 | 
            +
                # print(dataframe)
         | 
| 131 | 
             
                dataframe = dataframe[COLS]
         | 
| 132 | 
             
                return dataframe
         | 
| 133 |  | 
|  | |
| 204 | 
             
                if is_delta_weight and not is_model_on_hub(base_model, revision):
         | 
| 205 | 
             
                    error_message = f'Base model "{base_model}" was not found on hub!'
         | 
| 206 | 
             
                    print(error_message)
         | 
| 207 | 
            +
                    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
         | 
| 208 |  | 
| 209 | 
             
                if not is_model_on_hub(model, revision):
         | 
| 210 | 
             
                    error_message = f'Model "{model}"was not found on hub!'
         | 
| 211 | 
             
                    print(error_message)
         | 
| 212 | 
            +
                    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error_message}</p>"
         | 
| 213 |  | 
| 214 | 
             
                print("adding new eval")
         | 
| 215 |  | 
|  | |
| 233 | 
             
                os.makedirs(OUT_DIR, exist_ok=True)
         | 
| 234 | 
             
                out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
         | 
| 235 |  | 
| 236 | 
            +
                # Check for duplicate submission
         | 
| 237 | 
            +
                if out_path.lower() in requested_models:
         | 
| 238 | 
            +
                    duplicate_request_message = "This model has been already submitted."
         | 
| 239 | 
            +
                    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{duplicate_request_message}</p>"
         | 
| 240 | 
            +
                
         | 
| 241 | 
             
                with open(out_path, "w") as f:
         | 
| 242 | 
             
                    f.write(json.dumps(eval_entry))
         | 
| 243 | 
             
                LMEH_REPO = "HuggingFaceH4/lmeh_evaluations"
         | 
|  | |
| 252 | 
             
                )
         | 
| 253 |  | 
| 254 | 
             
                success_message = "Your request has been submitted to the evaluation queue!"
         | 
| 255 | 
            +
                return f"<p style='color: green; font-size: 20px; text-align: center;'>{success_message}</p>"
         | 
| 256 |  | 
| 257 |  | 
| 258 | 
             
            def refresh():
         | 
    	
        utils.py
    CHANGED
    
    | @@ -133,4 +133,4 @@ def get_eval_results_dicts(is_public=True) -> List[Dict]: | |
| 133 | 
             
                return [e.to_dict() for e in eval_results]
         | 
| 134 |  | 
| 135 | 
             
            eval_results_dict = get_eval_results_dicts()
         | 
| 136 | 
            -
            print(eval_results_dict)
         | 
|  | |
| 133 | 
             
                return [e.to_dict() for e in eval_results]
         | 
| 134 |  | 
| 135 | 
             
            eval_results_dict = get_eval_results_dicts()
         | 
| 136 | 
            +
            # print(eval_results_dict)
         | 

