zhoudanxie commited on
Commit
4ea5901
1 Parent(s): 16160fe

Fix fr_tracking data errors

Browse files

Drop duplicates in fr_tracking data if any

Files changed (1) hide show
  1. modules/significant.py +7 -0
modules/significant.py CHANGED
@@ -48,6 +48,13 @@ def read_csv_data(
48
  except UnicodeDecodeError:
49
  df_pd = pd_read_csv(url, usecols=cols, encoding="latin")
50
 
 
 
 
 
 
 
 
51
  df_pd.loc[:, "publication_dt"] = to_datetime(df_pd["publication_date"], format="mixed", dayfirst=False, yearfirst=False)
52
  max_date = max(df_pd.loc[:, "publication_dt"].to_list()).date()
53
  #print(max_date)
 
48
  except UnicodeDecodeError:
49
  df_pd = pd_read_csv(url, usecols=cols, encoding="latin")
50
 
51
+ # drop duplicates if any (to avoid dashboard crush; original data need to be revised)
52
+ if len(df_pd[df_pd.duplicated(subset=['document_number'],keep=False)])>0:
53
+ df_pd=df_pd.sort_values(['document_number','publication_date','significant','3(f)(1) significant','Major']).\
54
+ drop_duplicates(subset=['document_number'],keep='last',ignore_index=True)
55
+ else:
56
+ pass
57
+
58
  df_pd.loc[:, "publication_dt"] = to_datetime(df_pd["publication_date"], format="mixed", dayfirst=False, yearfirst=False)
59
  max_date = max(df_pd.loc[:, "publication_dt"].to_list()).date()
60
  #print(max_date)