Szymon Woźniak commited on
Commit
55c9088
1 Parent(s): 9d3e113

change citation formatting

Browse files
Files changed (1) hide show
  1. pages/2_Dataset_Statistics.py +2 -5
pages/2_Dataset_Statistics.py CHANGED
@@ -7,9 +7,6 @@ from filter_dataframe import filter_dataframe
7
  def get_language_stats_df():
8
  return pd.read_parquet("data/datasets_stats.parquet")
9
 
10
- def export_citations(df: pd.DataFrame) -> str:
11
- return "\n\n".join(df["citation"].tolist())
12
-
13
  _MMS_CITATION = """\
14
  @misc{augustyniak2023massively,
15
  title={Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark},
@@ -20,7 +17,7 @@ _MMS_CITATION = """\
20
  primaryClass={cs.CL}
21
  }"""
22
 
23
- CITATION_SEPARATOR = "\n% " + ("-" * 90) + "\n"
24
 
25
  def export_citations(df: pd.DataFrame):
26
  dataset_names = df.original_dataset.tolist()
@@ -30,7 +27,7 @@ def export_citations(df: pd.DataFrame):
30
  citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset")
31
  dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list()
32
  dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations)
33
- return f"{_MMS_CITATION}\n\n{dataset_citations_joined}"
34
 
35
 
36
  st.set_page_config(page_title="Dataset statistics", page_icon="📈")
 
7
  def get_language_stats_df():
8
  return pd.read_parquet("data/datasets_stats.parquet")
9
 
 
 
 
10
  _MMS_CITATION = """\
11
  @misc{augustyniak2023massively,
12
  title={Massively Multilingual Corpus of Sentiment Datasets and Multi-faceted Sentiment Classification Benchmark},
 
17
  primaryClass={cs.CL}
18
  }"""
19
 
20
+ CITATION_SEPARATOR = "% " + ("-" * 90) + "\n\n"
21
 
22
  def export_citations(df: pd.DataFrame):
23
  dataset_names = df.original_dataset.tolist()
 
27
  citations_grouped_df = df.groupby("citation").agg({"dataset": lambda x: ", ".join(x)}).reset_index().sort_values(by="dataset")
28
  dataset_citations = ("% Datasets: " + citations_grouped_df["dataset"] + "\n" + citations_grouped_df["citation"]).to_list()
29
  dataset_citations_joined = CITATION_SEPARATOR.join(dataset_citations)
30
+ return f"% MMS corpus citation\n{_MMS_CITATION}\n{CITATION_SEPARATOR}{dataset_citations_joined}"
31
 
32
 
33
  st.set_page_config(page_title="Dataset statistics", page_icon="📈")