Jessica Walkenhorst commited on
Commit
6349813
1 Parent(s): 206371d

Add person and attendancelist code

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .ipynb_checkpoints/
2
+ __pycache__
src/maorganizer/datawrangling.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import List, Set
5
+
6
+ import pandas as pd
7
+
8
+ DATAFOLDER = Path().cwd() / "data"
9
+
10
+ MONTH = "Feb"
11
+
12
+ FILENAME = f"participants-Meetup-{MONTH}"
13
+
14
+
15
+ @dataclass
16
+ class Person:
17
+ name: str
18
+
19
+ def __post_init__(self):
20
+ self.name = self.name.title()
21
+
22
+ def __hash__(self):
23
+ return hash(self.name)
24
+
25
+ def __equal__(self, other):
26
+ return self.name == other.name
27
+
28
+ def is_similar(self, other: "Person"):
29
+ return (
30
+ len(set(self.name.split(" ")).intersection(set(other.name.split(" ")))) != 0
31
+ )
32
+
33
+ @property
34
+ def firstname(self):
35
+ return self.name.split(" ")[0]
36
+
37
+ @property
38
+ def lastname(self):
39
+ return " ".join(self.name.split(" ")[1:])
40
+
41
+
42
+ @dataclass
43
+ class Attendancelist:
44
+ participants: Set[Person]
45
+
46
+ def load_from_file(
47
+ filename: pathlib.PosixPath, cname: str = "Name", sep: str = None
48
+ ):
49
+ if sep:
50
+ df = pd.read_csv(filename, sep=sep)
51
+ elif filename.suffix in [".xlsx", ".xls"]:
52
+ df = pd.read_excel(filename)
53
+ elif filename.suffix == ".csv":
54
+ df = pd.read_csv(filename)
55
+ else:
56
+ raise ValueError(
57
+ "Unsupported filetype, please specify a separator or choose one "
58
+ "of the following filetypes: .xlsx, .xls, .csv"
59
+ )
60
+
61
+ return Attendancelist({Person(name) for name in df[cname]})
62
+
63
+ def to_df(self):
64
+ return pd.DataFrame(
65
+ [
66
+ [participant.firstname, participant.lastname]
67
+ for participant in self.participants
68
+ ],
69
+ columns=["firstname", "lastname"],
70
+ ).sort_values(by="firstname")
71
+
72
+ def to_file(self, filename: pathlib.PosixPath):
73
+ if filename.suffix == ".xlsx":
74
+ self.to_df().to_excel(filename, index=False)
75
+ elif filename.suffix == ".csv":
76
+ self.to_df().to_csv(filename, index=False)
77
+ else:
78
+ raise ValueError(
79
+ "Unsupported filetype, please choose one of the following: .xlsx, .csv"
80
+ )
81
+
82
+ def update(self, other: "Attendancelist"):
83
+ return Attendancelist(other.participants - self.participants)
84
+
85
+ def find(self, somebody: str):
86
+ return {p for p in self.participants if p.is_similar(somebody)}
87
+
88
+ def find_multiple(self, people: List[str]):
89
+ return {p: self.find(p) for p in people}
tests/test_attendancelist.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from maorganizer.datawrangling import Person, Attendancelist
2
+
3
+
4
+ def test_attendancelist_finds_person_by_substring():
5
+ assert Attendancelist({Person("zaphod beeblebrox"), Person("ford prefix")}).find(
6
+ Person("zaphod")
7
+ ) == {Person("Zaphod Beeblebrox")}
8
+
9
+
10
+ def test_attendancelists_finds_multiple_people_if_existent():
11
+ assert Attendancelist(
12
+ {Person("zaphod beeblebrox"), Person("zaphod prefix"), Person("ford prefix")}
13
+ ).find(Person("zaphod")) == {Person("Zaphod Beeblebrox"), Person("Zaphod Prefix")}
14
+
15
+
16
+ def test_find_multiple_finds_alls():
17
+ assert Attendancelist(
18
+ {Person("zaphod beeblebrox"), Person("ford prefix"), Person("Marvin")}
19
+ ).find_multiple({Person("zaphod"), Person("ford prefix")}) == {
20
+ Person(name="Ford Prefix"): {Person(name="Ford Prefix")},
21
+ Person(name="Zaphod"): {Person(name="Zaphod Beeblebrox")},
22
+ }
tests/test_datawrangling.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from maorganizer.datawrangling import Person
4
+
5
+
6
+ def test_Person_parses_name_correctly():
7
+ assert Person("Zaphod Beeblebrox").name == "Zaphod Beeblebrox"
8
+
9
+
10
+ def test_Person_name_gets_capitalized_correctly():
11
+ assert Person("zaphod beeblebrox").name == "Zaphod Beeblebrox"
12
+
13
+
14
+ @pytest.mark.parametrize(
15
+ "name,firstname",
16
+ [
17
+ ("Zaphod", "Zaphod"),
18
+ ("Zaphod Beeblebrox", "Zaphod"),
19
+ ("Zaphod Lucius Beeblebrox", "Zaphod"),
20
+ ],
21
+ )
22
+ def test__Person_firstname_is_extracted_correctly(name, firstname):
23
+ assert Person(name).firstname == firstname
24
+
25
+
26
+ @pytest.mark.parametrize(
27
+ "name,lastname",
28
+ [
29
+ ("zaphod", ""),
30
+ ("Zaphod Beeblebrox", "Beeblebrox"),
31
+ ("Zaphod Lucius Beeblebrox", "Lucius Beeblebrox"),
32
+ ],
33
+ )
34
+ def test_Person_lastname_is_extracted_correctly(name, lastname):
35
+ assert Person(name).lastname == lastname