hashmalmellow
commited on
Commit
•
be75081
1
Parent(s):
0817de7
Create test2.py
Browse files
test2.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import BertTokenizer, BertModel
|
2 |
+
import torch
|
3 |
+
from torch.utils.data import DataLoader
|
4 |
+
|
5 |
+
# Step 1: Preprocess Your Data
|
6 |
+
# Sample student resumes and internship job descriptions
|
7 |
+
student_resume_1 = "I have experience in Python programming and data analysis."
|
8 |
+
internship_job_description_1 = "Looking for a data analyst intern proficient in Python and SQL."
|
9 |
+
|
10 |
+
student_resume_2 = "Experienced in web development with HTML, CSS, and JavaScript."
|
11 |
+
internship_job_description_2 = "Seeking a web development intern skilled in HTML, CSS, and JavaScript."
|
12 |
+
|
13 |
+
# Step 2: Tokenization
|
14 |
+
# Load pre-trained BERT tokenizer
|
15 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
16 |
+
|
17 |
+
# Step 3: Encoding Inputs
|
18 |
+
# Tokenize and encode the text data
|
19 |
+
encoded_student_resume_1 = tokenizer(student_resume_1, padding=True, truncation=True, return_tensors='pt')
|
20 |
+
encoded_internship_job_description_1 = tokenizer(internship_job_description_1, padding=True, truncation=True, return_tensors='pt')
|
21 |
+
|
22 |
+
encoded_student_resume_2 = tokenizer(student_resume_2, padding=True, truncation=True, return_tensors='pt')
|
23 |
+
encoded_internship_job_description_2 = tokenizer(internship_job_description_2, padding=True, truncation=True, return_tensors='pt')
|
24 |
+
|
25 |
+
# Step 4: Batching and Data Loading
|
26 |
+
# Organize encoded input features into batches
|
27 |
+
batch_size = 2
|
28 |
+
dataset = [(encoded_student_resume_1, encoded_internship_job_description_1),
|
29 |
+
(encoded_student_resume_2, encoded_internship_job_description_2)]
|
30 |
+
dataloader = DataLoader(dataset, batch_size=batch_size)
|
31 |
+
|
32 |
+
# Step 5: Feed Data into the Model
|
33 |
+
# Load pre-trained BERT model
|
34 |
+
model = BertModel.from_pretrained('bert-base-uncased')
|
35 |
+
|
36 |
+
# Iterate over batches and feed data into the model
|
37 |
+
for batch in dataloader:
|
38 |
+
student_resume_batch, internship_job_description_batch = batch
|
39 |
+
|
40 |
+
# Feed input_ids to the model
|
41 |
+
student_resume_outputs = model(**student_resume_batch)
|
42 |
+
internship_job_description_outputs = model(**internship_job_description_batch)
|
43 |
+
|
44 |
+
# Get model outputs
|
45 |
+
student_resume_last_hidden_states = student_resume_outputs.last_hidden_state
|
46 |
+
internship_job_description_last_hidden_states = internship_job_description_outputs.last_hidden_state
|
47 |
+
|
48 |
+
# Perform further processing or analysis with the model outputs
|
49 |
+
# For example, compute similarity scores between student resumes and internship job descriptions
|
50 |
+
# ...
|