recruiter-assistant-jbfxrs / scripts /preprocess-vacancies.py
Vincent Claes
preprocess resume
df97380
raw
history blame contribute delete
No virus
448 Bytes
import pandas as pd
import pathlib
current_directory = pathlib.Path(__file__).resolve().parent
df = pd.read_csv(current_directory / "vacancies.csv")
df = df[["Vacancy"]]
print(df.shape)
df = df.drop_duplicates(subset=["Vacancy"])
print(df.shape)
# Remove all the new lines from each cell of the 'Resume' column
df["Vacancy"] = df["Vacancy"].replace("\n", ".,", regex=True)
df.to_csv(current_directory / "jobfixers.csv", index=False, header=False)