Spaces:
Runtime error
Runtime error
verified_categories
#32
by
JulesLambert345
- opened
- app.py +4 -0
- requirements.txt +2 -1
- src/verified_categorie.py +127 -0
app.py
CHANGED
@@ -17,6 +17,7 @@ from src.utils import init_map
|
|
17 |
from src.map_utils import get_legend_macro
|
18 |
from src.dataframes import load_data
|
19 |
import gettext
|
|
|
20 |
|
21 |
gettext.install("myapplication")
|
22 |
|
@@ -85,6 +86,9 @@ selected_options, options, show_unverified, show_interventions = show_requests_f
|
|
85 |
len_solved_verified_requests,
|
86 |
) = load_data(show_unverified, selected_options, options)
|
87 |
|
|
|
|
|
|
|
88 |
# Selection of interventions
|
89 |
selected_statuses = show_interventions_filters()
|
90 |
|
|
|
17 |
from src.map_utils import get_legend_macro
|
18 |
from src.dataframes import load_data
|
19 |
import gettext
|
20 |
+
from src.verified_categorie import add_category, string_category
|
21 |
|
22 |
gettext.install("myapplication")
|
23 |
|
|
|
86 |
len_solved_verified_requests,
|
87 |
) = load_data(show_unverified, selected_options, options)
|
88 |
|
89 |
+
verified_df = add_category(verified_df)
|
90 |
+
verified_df = string_category(verified_df)
|
91 |
+
|
92 |
# Selection of interventions
|
93 |
selected_statuses = show_interventions_filters()
|
94 |
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
folium
|
2 |
-
streamlit_folium
|
|
|
|
1 |
folium
|
2 |
+
streamlit_folium
|
3 |
+
nltk
|
src/verified_categorie.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from enum import Enum
|
3 |
+
import pandas as pd
|
4 |
+
import nltk
|
5 |
+
from nltk.stem import WordNetLemmatizer
|
6 |
+
|
7 |
+
|
8 |
+
PHRASE_NO_PROBLEMS = ['got food',
|
9 |
+
'got food and clothes',
|
10 |
+
'got food and covers']
|
11 |
+
|
12 |
+
KEYS_HOUSE = [
|
13 |
+
"shelters",
|
14 |
+
"mattresses",
|
15 |
+
"pillows",
|
16 |
+
"blankets",
|
17 |
+
"shelter",
|
18 |
+
"tentes",
|
19 |
+
"housing",
|
20 |
+
"couvertures",
|
21 |
+
"tents",
|
22 |
+
"covers",
|
23 |
+
"sdader",
|
24 |
+
"housing_shelter",
|
25 |
+
]
|
26 |
+
KEYS_FOOD = [
|
27 |
+
"groceries",
|
28 |
+
"nouriture",
|
29 |
+
"food",
|
30 |
+
"water",
|
31 |
+
"gaz",
|
32 |
+
"dishes",
|
33 |
+
"oil",
|
34 |
+
"sugar",
|
35 |
+
"tea",
|
36 |
+
"hungry",
|
37 |
+
]
|
38 |
+
KEYS_CLOTHES = [
|
39 |
+
"clothes",
|
40 |
+
"clothing",
|
41 |
+
"hygiene",
|
42 |
+
]
|
43 |
+
KEYS_MEDICAL = [
|
44 |
+
"betadine",
|
45 |
+
"medical",
|
46 |
+
"diabetics",
|
47 |
+
"medicaments",
|
48 |
+
"diabetes",
|
49 |
+
"doliprane",
|
50 |
+
"vitamines",
|
51 |
+
"drugs",
|
52 |
+
]
|
53 |
+
|
54 |
+
class HelpCategory(Enum):
|
55 |
+
HOUSE = 'house'
|
56 |
+
FOOD = 'food'
|
57 |
+
CLOTHES = 'clothes'
|
58 |
+
MEDICAL = 'medical'
|
59 |
+
UNKNOW = 'unknow'
|
60 |
+
|
61 |
+
|
62 |
+
nltk.download('wordnet')
|
63 |
+
nltk.download('omw-1.4')
|
64 |
+
lemmatizer = WordNetLemmatizer()
|
65 |
+
|
66 |
+
lemmatize_house = [lemmatizer.lemmatize(word) for word in KEYS_HOUSE]
|
67 |
+
lemmatize_food = [lemmatizer.lemmatize(word) for word in KEYS_FOOD]
|
68 |
+
lemmatize_clothes = [lemmatizer.lemmatize(word) for word in KEYS_CLOTHES]
|
69 |
+
lemmatize_medical = [lemmatizer.lemmatize(word) for word in KEYS_MEDICAL]
|
70 |
+
|
71 |
+
def to_category(text: str) -> List[HelpCategory]:
|
72 |
+
if text in PHRASE_NO_PROBLEMS:
|
73 |
+
return []
|
74 |
+
|
75 |
+
words = text.split()
|
76 |
+
categories = []
|
77 |
+
for word in words:
|
78 |
+
if word in KEYS_HOUSE:
|
79 |
+
categories.append(HelpCategory.HOUSE)
|
80 |
+
elif word in KEYS_FOOD:
|
81 |
+
categories.append(HelpCategory.FOOD)
|
82 |
+
if word in KEYS_CLOTHES:
|
83 |
+
categories.append(HelpCategory.CLOTHES)
|
84 |
+
if word in KEYS_MEDICAL:
|
85 |
+
categories.append(HelpCategory.MEDICAL)
|
86 |
+
if lemmatizer.lemmatize(word) in lemmatize_house:
|
87 |
+
categories.append(HelpCategory.HOUSE)
|
88 |
+
if lemmatizer.lemmatize(word) in lemmatize_food:
|
89 |
+
categories.append(HelpCategory.FOOD)
|
90 |
+
if lemmatizer.lemmatize(word) in lemmatize_clothes:
|
91 |
+
categories.append(HelpCategory.CLOTHES)
|
92 |
+
if lemmatizer.lemmatize(word) in lemmatize_medical:
|
93 |
+
categories.append(HelpCategory.MEDICAL)
|
94 |
+
if len(categories) == 0:
|
95 |
+
categories = [HelpCategory.UNKNOW]
|
96 |
+
return categories
|
97 |
+
|
98 |
+
|
99 |
+
def clean(text: str) -> str:
|
100 |
+
text = text.replace('Housing/Shelter', 'housing_shelter')
|
101 |
+
text = text.replace('/', ',')
|
102 |
+
text = text.lower()
|
103 |
+
text = text.strip()
|
104 |
+
return text
|
105 |
+
|
106 |
+
|
107 |
+
def to_list(text: str) -> List[str]:
|
108 |
+
helps = text.split(',')
|
109 |
+
helps = [help_string.replace('.', ' ').strip() for help_string in helps]
|
110 |
+
return helps
|
111 |
+
|
112 |
+
|
113 |
+
def help_text_to_help_category(helps: List[str]) -> List[str]:
|
114 |
+
all_categories = set()
|
115 |
+
for help_string in helps:
|
116 |
+
categories = to_category(help_string)
|
117 |
+
all_categories.update(categories)
|
118 |
+
return list(all_categories)
|
119 |
+
|
120 |
+
|
121 |
+
def add_category(df:pd.DataFrame) -> pd.DataFrame:
|
122 |
+
df['help_category'] = df['Help Details'].apply(clean).apply(to_list).apply(help_text_to_help_category)
|
123 |
+
return df
|
124 |
+
|
125 |
+
def string_category(df:pd.DataFrame) -> pd.DataFrame:
|
126 |
+
df['help_category'] = df['help_category'].apply(lambda x : ','.join([category.value for category in x]))
|
127 |
+
return df
|