Ubuntu commited on
Commit
6b1f9f6
1 Parent(s): f81b82b

added dataset for content moderation

Browse files
data/categories.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:778a6031897e58a22754cb61b8ca1fe3316d360708a855cd2fc7f3b3172dbff9
3
+ size 52011
data/categories_refined.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc99dd917f1d90c0e240fafcc0dc2c164d39ca692c5b538558c94213c284d6a
3
+ size 473
data/categories_refined.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Beauty_and_Fitness": 0,
3
+ "People_and_Society": 1,
4
+ "Travel_and_Transportation": 2,
5
+ "Shopping": 3,
6
+ "Adult": 4,
7
+ "Sports": 5,
8
+ "Science": 6,
9
+ "Food_and_Drink": 7,
10
+ "News": 8,
11
+ "Sensitive Subjects": 9,
12
+ "Autos_and_Vehicles": 10,
13
+ "Law_and_Government": 11,
14
+ "Business_and_Industrial": 12,
15
+ "Health": 13,
16
+ "Real Estate": 14,
17
+ "Books_and_Literature": 15,
18
+ "Computers_and_Electronics": 16,
19
+ "Internet_and_Telecom": 17,
20
+ "Home_and_Garden": 18,
21
+ "Jobs_and_Education": 19,
22
+ "Online Communities": 20,
23
+ "Finance": 21,
24
+ "Arts_and_Entertainment": 22,
25
+ "Games": 23,
26
+ "Hobbies_and_Leisure": 24,
27
+ "Reference": 25,
28
+ "Pets_and_Animals": 26
29
+ }
data/categories_refined_reverse.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": "Beauty_and_Fitness",
3
+ "1": "People_and_Society",
4
+ "2": "Travel_and_Transportation",
5
+ "3": "Shopping",
6
+ "4": "Adult",
7
+ "5": "Sports",
8
+ "6": "Science",
9
+ "7": "Food_and_Drink",
10
+ "8": "News",
11
+ "9": "Sensitive Subjects",
12
+ "10": "Autos_and_Vehicles",
13
+ "11": "Law_and_Government",
14
+ "12": "Business_and_Industrial",
15
+ "13": "Health",
16
+ "14": "Real Estate",
17
+ "15": "Books_and_Literature",
18
+ "16": "Computers_and_Electronics",
19
+ "17": "Internet_and_Telecom",
20
+ "18": "Home_and_Garden",
21
+ "19": "Jobs_and_Education",
22
+ "20": "Online Communities",
23
+ "21": "Finance",
24
+ "22": "Arts_and_Entertainment",
25
+ "23": "Games",
26
+ "24": "Hobbies_and_Leisure",
27
+ "25": "Reference",
28
+ "26": "Pets_and_Animals"
29
+ }
data/final_adult_content.xlsx ADDED
Binary file (66.2 kB). View file
 
data_categories/Adult.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22de4244454e054a6eda1b3cb6c6172e7d5b82be16b54a1ce61a4595506a54c0
3
+ size 17372
data_categories/Arts_and_Entertainment.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e1362eda4777624ea8a914bad2941f4be81692305e9dd58c8d785d3bf332e9
3
+ size 40121
data_categories/Autos_and_Vehicles.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecc87c19910ad8106cd0469d653b648b8467d2e053da2bdb1daf8feadb8a41a
3
+ size 94471
data_categories/Beauty_and_Fitness.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b6bfaa17ae6c8a0a0a2216e701d83bce11edd111b40d4d70e6139ef53a692b
3
+ size 32971
data_categories/Books_and_Literature.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd3f1c35c45c75730dd5c5b40e6d783d38424a90205d24ba07037efc6d74fd1
3
+ size 41885
data_categories/Business_and_Industrial.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:235b21afaa1e2a4e8f9ffb22df3e5498a29ae47a95ff2bb538ea2947fa685a62
3
+ size 46290
data_categories/Computers_and_Electronics.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ba5a3666e7d67ef57a7fcf301685ae7b66ffc1ed4f33ee6d7b01d11af136ed
3
+ size 27167
data_categories/Finance.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b061d6a2d4a9b298354add7aec0ae21eaff9f0b722b87a90c7e51f39c56b1f7
3
+ size 37987
data_categories/Food_and_Drink.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317f8d87736b3523b83ec3fe0c8bcd2d36df06fce8076a4a414125bfc1ca249b
3
+ size 42073
data_categories/Games.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d8c1f1995e43b827c6abd02781ac9e2aff08937fdf9d210ae3066728816308
3
+ size 27911
data_categories/Health.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee5d6e90846d5ca5479c1e2a404b1cdc67a5e18bc464140c724a64efdf66d72
3
+ size 30005
data_categories/Hobbies_and_Leisure.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6dd902daefb79a277d66511a78d9d9d68e8b5710ddbd1b9d5c346b06e29c0e
3
+ size 25404
data_categories/Home_and_Garden.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2037b08533f76de4f4c5512f9a697e76dc7c276fab230eee27bf6af93e8bca4
3
+ size 41020
data_categories/Internet_and_Telecom.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2db27b427661774cd4f202a11070c7a586af3498256e836dbccdd951c9b83c
3
+ size 45065
data_categories/Jobs_and_Education.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9facf376cc8f80a8c0c90591bf17665ae254a7545aaf9255a50b74ecd15a91
3
+ size 41746
data_categories/Law_and_Government.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085a7cc338c3fb1efc9c62443950831d122453d1d7fab84fc317bce0772f90cf
3
+ size 41879
data_categories/News.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e0a8d81bbe5cd978dea88d3eda0fc01e3528106a9a3bd2e9fc5bdfdeb50632
3
+ size 23200
data_categories/Online Communities.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:903899c0689fab2c6648a715e5e488429b847054af33c1d26b252eb36352cf29
3
+ size 46352
data_categories/People_and_Society.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac0f67f4c684e3a5b56f925e60ed6d5883947b828283d4cbdebae31451487ee
3
+ size 36511
data_categories/Pets_and_Animals.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d31c9e93973ac0a65c82b81c2eae9634b06fb3205560b347fd618567e517d7
3
+ size 57283
data_categories/Real Estate.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98584873072b57c4b8629f8071ae0ffe4548f2c1a953260693659f2f740c43a6
3
+ size 36020
data_categories/Reference.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d77e7335dab3965e50b81a91d2f4312e5408705847ad425b0261a0e45acd136
3
+ size 55091
data_categories/Science.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3057d6d3eb499516bd098f13399ab51b4eea95be7816769c8e409638af31d1e
3
+ size 42699
data_categories/Sensitive Subjects.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6851357c45f0aa9baad91b8263d4f85421ae16952682e44279b193e289e9ed32
3
+ size 10172
data_categories/Shopping.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88fe8138d63bd43921db7982b432d2d78bbcf8c17d182cf55877c616e856675f
3
+ size 46132
data_categories/Sports.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b063901d162d04321a70b2761764a9bf97276b746c5c0084ee974216a2ee4812
3
+ size 40242
data_categories/Travel_and_Transportation.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7becc914296a422a71898a7126c9e5af1c2bc23a9ffe0eebef8f72499b68ad
3
+ size 44633
requirements.txt CHANGED
@@ -8,4 +8,8 @@ tensorflow
8
  tensorflow_hub
9
  tensorflow_text
10
  scikit-learn
11
- evaluate
 
 
 
 
 
8
  tensorflow_hub
9
  tensorflow_text
10
  scikit-learn
11
+ evaluate
12
+ openpyxl
13
+ summa
14
+ git+https://github.com/LIAAD/yake
15
+ multi_rake
research/07_adult_content_dataset.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
research/07_creating_data_for_categories.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
research/trials.ipynb CHANGED
@@ -810,6 +810,13 @@
810
  "# df.head(5).values[4][0].split(\"\\n\")\n"
811
  ]
812
  },
 
 
 
 
 
 
 
813
  {
814
  "cell_type": "code",
815
  "execution_count": null,
 
810
  "# df.head(5).values[4][0].split(\"\\n\")\n"
811
  ]
812
  },
813
+ {
814
+ "cell_type": "code",
815
+ "execution_count": null,
816
+ "metadata": {},
817
+ "outputs": [],
818
+ "source": []
819
+ },
820
  {
821
  "cell_type": "code",
822
  "execution_count": null,