Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Petr Tsvetkov
		
	commited on
		
		
					Commit 
							
							Β·
						
						c151bb0
	
1
								Parent(s):
							
							0b259d2
								
Fix the statistics in visualization
Browse files- change_visualizer.py +2 -2
- statistics.py +3 -11
    	
        change_visualizer.py
    CHANGED
    
    | @@ -9,8 +9,8 @@ n_diffs_manual = len(df_manual) | |
| 9 | 
             
            df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
         | 
| 10 | 
             
            n_diffs_synthetic = len(df_synthetic)
         | 
| 11 |  | 
| 12 | 
            -
            STATISTICS = {"manual": statistics. | 
| 13 | 
            -
                          "synthetic": statistics. | 
| 14 |  | 
| 15 |  | 
| 16 | 
             
            def update_dataset_view(diff_idx):
         | 
|  | |
| 9 | 
             
            df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
         | 
| 10 | 
             
            n_diffs_synthetic = len(df_synthetic)
         | 
| 11 |  | 
| 12 | 
            +
            STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
         | 
| 13 | 
            +
                          "synthetic": statistics.get_statistics_for_df(df_synthetic)}
         | 
| 14 |  | 
| 15 |  | 
| 16 | 
             
            def update_dataset_view(diff_idx):
         | 
    	
        statistics.py
    CHANGED
    
    | @@ -22,18 +22,10 @@ def get_statistics(start_msg, end_msg, annotated_msg): | |
| 22 | 
             
                }
         | 
| 23 |  | 
| 24 |  | 
| 25 | 
            -
            def get_statistics_for_df(df: pd.DataFrame | 
| 26 | 
            -
                stats = [get_statistics(row[ | 
|  | |
| 27 |  | 
| 28 | 
             
                assert len(stats) > 0
         | 
| 29 |  | 
| 30 | 
             
                return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
         | 
| 31 | 
            -
             | 
| 32 | 
            -
             | 
| 33 | 
            -
            def get_statistics_for_manual_df(df):
         | 
| 34 | 
            -
                return get_statistics_for_df(df, start_col="commit_msg_start", end_col='commit_msg_end',
         | 
| 35 | 
            -
                                             annotated_col='annotated_diff')
         | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
            def get_statistics_for_synthetic_df(df):
         | 
| 39 | 
            -
                return get_statistics_for_df(df, start_col="initial_msg_pred", end_col='reference', annotated_col='annotated_diff')
         | 
|  | |
| 22 | 
             
                }
         | 
| 23 |  | 
| 24 |  | 
| 25 | 
            +
            def get_statistics_for_df(df: pd.DataFrame):
         | 
| 26 | 
            +
                stats = [get_statistics(row["commit_msg_start"], row["commit_msg_end"], row["annotated_diff"]) for _, row in
         | 
| 27 | 
            +
                         df.iterrows()]
         | 
| 28 |  | 
| 29 | 
             
                assert len(stats) > 0
         | 
| 30 |  | 
| 31 | 
             
                return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]}
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
