Spaces:
Runtime error
Runtime error
Jessica Walkenhorst
commited on
Commit
•
6349813
1
Parent(s):
206371d
Add person and attendancelist code
Browse files- .gitignore +2 -0
- src/maorganizer/datawrangling.py +89 -0
- tests/test_attendancelist.py +22 -0
- tests/test_datawrangling.py +35 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints/
|
2 |
+
__pycache__
|
src/maorganizer/datawrangling.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pathlib
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import List, Set
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
DATAFOLDER = Path().cwd() / "data"
|
9 |
+
|
10 |
+
MONTH = "Feb"
|
11 |
+
|
12 |
+
FILENAME = f"participants-Meetup-{MONTH}"
|
13 |
+
|
14 |
+
|
15 |
+
@dataclass
|
16 |
+
class Person:
|
17 |
+
name: str
|
18 |
+
|
19 |
+
def __post_init__(self):
|
20 |
+
self.name = self.name.title()
|
21 |
+
|
22 |
+
def __hash__(self):
|
23 |
+
return hash(self.name)
|
24 |
+
|
25 |
+
def __equal__(self, other):
|
26 |
+
return self.name == other.name
|
27 |
+
|
28 |
+
def is_similar(self, other: "Person"):
|
29 |
+
return (
|
30 |
+
len(set(self.name.split(" ")).intersection(set(other.name.split(" ")))) != 0
|
31 |
+
)
|
32 |
+
|
33 |
+
@property
|
34 |
+
def firstname(self):
|
35 |
+
return self.name.split(" ")[0]
|
36 |
+
|
37 |
+
@property
|
38 |
+
def lastname(self):
|
39 |
+
return " ".join(self.name.split(" ")[1:])
|
40 |
+
|
41 |
+
|
42 |
+
@dataclass
|
43 |
+
class Attendancelist:
|
44 |
+
participants: Set[Person]
|
45 |
+
|
46 |
+
def load_from_file(
|
47 |
+
filename: pathlib.PosixPath, cname: str = "Name", sep: str = None
|
48 |
+
):
|
49 |
+
if sep:
|
50 |
+
df = pd.read_csv(filename, sep=sep)
|
51 |
+
elif filename.suffix in [".xlsx", ".xls"]:
|
52 |
+
df = pd.read_excel(filename)
|
53 |
+
elif filename.suffix == ".csv":
|
54 |
+
df = pd.read_csv(filename)
|
55 |
+
else:
|
56 |
+
raise ValueError(
|
57 |
+
"Unsupported filetype, please specify a separator or choose one "
|
58 |
+
"of the following filetypes: .xlsx, .xls, .csv"
|
59 |
+
)
|
60 |
+
|
61 |
+
return Attendancelist({Person(name) for name in df[cname]})
|
62 |
+
|
63 |
+
def to_df(self):
|
64 |
+
return pd.DataFrame(
|
65 |
+
[
|
66 |
+
[participant.firstname, participant.lastname]
|
67 |
+
for participant in self.participants
|
68 |
+
],
|
69 |
+
columns=["firstname", "lastname"],
|
70 |
+
).sort_values(by="firstname")
|
71 |
+
|
72 |
+
def to_file(self, filename: pathlib.PosixPath):
|
73 |
+
if filename.suffix == ".xlsx":
|
74 |
+
self.to_df().to_excel(filename, index=False)
|
75 |
+
elif filename.suffix == ".csv":
|
76 |
+
self.to_df().to_csv(filename, index=False)
|
77 |
+
else:
|
78 |
+
raise ValueError(
|
79 |
+
"Unsupported filetype, please choose one of the following: .xlsx, .csv"
|
80 |
+
)
|
81 |
+
|
82 |
+
def update(self, other: "Attendancelist"):
|
83 |
+
return Attendancelist(other.participants - self.participants)
|
84 |
+
|
85 |
+
def find(self, somebody: str):
|
86 |
+
return {p for p in self.participants if p.is_similar(somebody)}
|
87 |
+
|
88 |
+
def find_multiple(self, people: List[str]):
|
89 |
+
return {p: self.find(p) for p in people}
|
tests/test_attendancelist.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from maorganizer.datawrangling import Person, Attendancelist
|
2 |
+
|
3 |
+
|
4 |
+
def test_attendancelist_finds_person_by_substring():
|
5 |
+
assert Attendancelist({Person("zaphod beeblebrox"), Person("ford prefix")}).find(
|
6 |
+
Person("zaphod")
|
7 |
+
) == {Person("Zaphod Beeblebrox")}
|
8 |
+
|
9 |
+
|
10 |
+
def test_attendancelists_finds_multiple_people_if_existent():
|
11 |
+
assert Attendancelist(
|
12 |
+
{Person("zaphod beeblebrox"), Person("zaphod prefix"), Person("ford prefix")}
|
13 |
+
).find(Person("zaphod")) == {Person("Zaphod Beeblebrox"), Person("Zaphod Prefix")}
|
14 |
+
|
15 |
+
|
16 |
+
def test_find_multiple_finds_alls():
|
17 |
+
assert Attendancelist(
|
18 |
+
{Person("zaphod beeblebrox"), Person("ford prefix"), Person("Marvin")}
|
19 |
+
).find_multiple({Person("zaphod"), Person("ford prefix")}) == {
|
20 |
+
Person(name="Ford Prefix"): {Person(name="Ford Prefix")},
|
21 |
+
Person(name="Zaphod"): {Person(name="Zaphod Beeblebrox")},
|
22 |
+
}
|
tests/test_datawrangling.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
|
3 |
+
from maorganizer.datawrangling import Person
|
4 |
+
|
5 |
+
|
6 |
+
def test_Person_parses_name_correctly():
|
7 |
+
assert Person("Zaphod Beeblebrox").name == "Zaphod Beeblebrox"
|
8 |
+
|
9 |
+
|
10 |
+
def test_Person_name_gets_capitalized_correctly():
|
11 |
+
assert Person("zaphod beeblebrox").name == "Zaphod Beeblebrox"
|
12 |
+
|
13 |
+
|
14 |
+
@pytest.mark.parametrize(
|
15 |
+
"name,firstname",
|
16 |
+
[
|
17 |
+
("Zaphod", "Zaphod"),
|
18 |
+
("Zaphod Beeblebrox", "Zaphod"),
|
19 |
+
("Zaphod Lucius Beeblebrox", "Zaphod"),
|
20 |
+
],
|
21 |
+
)
|
22 |
+
def test__Person_firstname_is_extracted_correctly(name, firstname):
|
23 |
+
assert Person(name).firstname == firstname
|
24 |
+
|
25 |
+
|
26 |
+
@pytest.mark.parametrize(
|
27 |
+
"name,lastname",
|
28 |
+
[
|
29 |
+
("zaphod", ""),
|
30 |
+
("Zaphod Beeblebrox", "Beeblebrox"),
|
31 |
+
("Zaphod Lucius Beeblebrox", "Lucius Beeblebrox"),
|
32 |
+
],
|
33 |
+
)
|
34 |
+
def test_Person_lastname_is_extracted_correctly(name, lastname):
|
35 |
+
assert Person(name).lastname == lastname
|