Shiv1143 commited on
Commit
8adda05
Β·
verified Β·
1 Parent(s): 665ac19

Upload 6 files

Browse files

adding required file

Readme.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SelectRight
2
+
3
+ ## Overview
4
+ This project aims to rank candidates for a role by comparing their resumes and interview transcripts using a language model.
5
+
6
+ ## Folder Structure
7
+ ```
8
+ MLE_Trial_Task/
9
+ β”œβ”€β”€ data/
10
+ β”‚ └── candidates.csv (optional, can be uploaded via the app)
11
+ β”œβ”€β”€ core_services/
12
+ β”‚ └── bot9_ai/
13
+ β”‚ └── modules/
14
+ β”‚ └── LLM/
15
+ β”‚ └── OpenAi.py
16
+ β”œβ”€β”€ src/
17
+ β”‚ β”œβ”€β”€ __init__.py
18
+ β”‚ β”œβ”€β”€ data_preparation.py
19
+ β”‚ β”œβ”€β”€ model.py
20
+ β”‚ β”œβ”€β”€ evaluation.py
21
+ β”‚ β”œβ”€β”€ bias_analysis.py
22
+ β”‚ └── report_generation.py
23
+ β”œβ”€β”€ app.py
24
+ β”œβ”€β”€ requirements.txt
25
+ └── README.md
26
+ ```
27
+
28
+ ## Setup
29
+ 1. Clone the repository.
30
+ 2. Install the required dependencies:
31
+ ```bash
32
+ pip install -r requirements.txt
33
+ ```
34
+ 3. Run the Streamlit app:
35
+ ```bash
36
+ streamlit run app.py
37
+ ```
38
+
39
+ ## Files
40
+ - `data/candidates.csv`: The dataset file (optional, can be uploaded via the app).
41
+ - `llmservice/OpenAi.py`: Contains the `OpenAi` class.
42
+ - `src/data_preparation.py`: Script for loading the dataset.
43
+ - `src/model.py`: Script for defining the model.
44
+ - `src/evaluation.py`: Script for evaluating the model.
45
+ - `src/bias_analysis.py`: Script for analyzing biases.
46
+ - `src/report_generation.py`: Script for generating the report.
47
+ - `app.py`: Streamlit app script.
48
+ - `requirements.txt`: List of dependencies.
49
+ - `README.md`: Project overview and setup instructions.
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas
2
+ openai
3
+ streamlit
src/data_preparation.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def load_data(file):
4
+ data = pd.read_csv(file)
5
+ return data
src/evaluation.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.model import compare_candidates
2
+
3
+ def evaluate_model(openai, data):
4
+ correct_predictions = 0
5
+
6
+ for index, row in data.iterrows():
7
+ candidateA = {
8
+ 'resume': row['candidateAResume'],
9
+ 'transcript': row['candidateATranscript']
10
+ }
11
+ candidateB = {
12
+ 'resume': row['candidateBResume'],
13
+ 'transcript': row['candidateBTranscript']
14
+ }
15
+ role = row['role']
16
+
17
+ prediction = compare_candidates(openai, candidateA, candidateB, role)
18
+
19
+ if prediction:
20
+ if (prediction == 'Candidate A' and row['winnerId'] == row['candidateAId']) or \
21
+ (prediction == 'Candidate B' and row['winnerId'] == row['candidateBId']):
22
+ correct_predictions += 1
23
+
24
+ accuracy = correct_predictions / len(data)
25
+ return accuracy
src/model.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import json
3
+
4
+ def initialize_openai(api_key):
5
+ openai.api_key = api_key
6
+
7
+ def prepare_training_data(training_data):
8
+ training_prompts = []
9
+ for index, row in training_data.iterrows():
10
+ job_description = row['role']
11
+ candidateA_resume = row['candidateAResume']
12
+ candidateB_resume = row['candidateBResume']
13
+ candidateA_transcript = row['candidateATranscript']
14
+ candidateB_transcript = row['candidateBTranscript']
15
+ winner_id = row['winnerId']
16
+
17
+ prompt = f"Job Description:\n{job_description}\n\nCandidate A Resume:\n{candidateA_resume}\n\nCandidate B Resume:\n{candidateB_resume}\n\nCandidate A Transcript:\n{candidateA_transcript}\n\nCandidate B Transcript:\n{candidateB_transcript}\n\nPreferred Candidate:"
18
+ completion = f"{winner_id}"
19
+
20
+ training_prompts.append({"prompt": prompt, "completion": completion})
21
+
22
+ with open("training_data.jsonl", "w") as f:
23
+ for item in training_prompts:
24
+ f.write(json.dumps(item) + "\n")
25
+
26
+ def upload_training_data(file_path):
27
+ with open(file_path, "rb") as f:
28
+ response = openai.files.create(
29
+ file=f,
30
+ purpose='fine-tune'
31
+ )
32
+ print("response-upload--->", response)
33
+ return response.id
34
+
35
+ def create_fine_tuning_job(file_id):
36
+ response = openai.fine_tuning.jobs.create(
37
+ training_file=file_id,
38
+ model="gpt-4o-2024-08-06",
39
+ )
40
+ print("response-create--->",response)
41
+ return response.fine_tuned_model
42
+
43
+ def fine_tune_model(training_data):
44
+ # Prepare training data
45
+ prepare_training_data(training_data)
46
+
47
+ # Upload training data
48
+ file_id = upload_training_data("training_data.jsonl")
49
+
50
+ # Create fine-tuning job
51
+ fine_tuned_model = create_fine_tuning_job(file_id)
52
+
53
+ return fine_tuned_model
54
+
55
+ def extract_keywords(resume, job_description, model):
56
+ prompt = f"Extract key skills and qualifications from the following resume based on the job description:\n\nJob Description:\n{job_description}\n\nResume:\n{resume}\n\nKey Skills and Qualifications:"
57
+ messages = [
58
+ {"role": "system", "content": "You are a helpful assistant."},
59
+ {"role": "user", "content": prompt}
60
+ ]
61
+ response = openai.chat.completions.create(model=model, messages=messages, max_tokens=100)
62
+ return response.choices[0].message.content
63
+
64
+ def rate_skills(transcript, job_description, model):
65
+ prompt = f"Rate the skills of the candidate based on the following interview transcript and job description:\n\nJob Description:\n{job_description}\n\nInterview Transcript:\n{transcript}\n\nSkill Ratings:"
66
+ messages = [
67
+ {"role": "system", "content": "You are a helpful assistant."},
68
+ {"role": "user", "content": prompt}
69
+ ]
70
+ response = openai.chat.completions.create(model=model, messages=messages, max_tokens=100)
71
+ return response.choices[0].message.content
72
+
73
+ def compare_candidates(candidateA, candidateB, job_description, model):
74
+ prompt = f"Based on the following details, return the candidate_id of the candidate which is the best fit for the role:\n\nJob Description:\n{job_description}\n\nCandidate A:\n{candidateA}\n\nCandidate B:\n{candidateB}\n\nPreferred Candidate:, ONLY RETURN THE CANDIDATE ID which would be of format '8ab47434-09a9-44e6-8c77-f9fd20c57765'"
75
+ messages = [
76
+ {"role": "system", "content": "You are a helpful assistant."},
77
+ {"role": "user", "content": prompt},
78
+ {"role": "system", "content": "<candidate_id>"}
79
+ ]
80
+ response = openai.chat.completions.create(model=model, messages=messages, max_tokens=100)
81
+ return response.choices[0].message.content
training_data.jsonl ADDED
The diff for this file is too large to render. See raw diff