|
from transformers import BertTokenizer, BertModel |
|
import torch |
|
from torch.utils.data import DataLoader |
|
|
|
|
|
|
|
student_resume_1 = "I have experience in Python programming and data analysis." |
|
internship_job_description_1 = "Looking for a data analyst intern proficient in Python and SQL." |
|
|
|
student_resume_2 = "Experienced in web development with HTML, CSS, and JavaScript." |
|
internship_job_description_2 = "Seeking a web development intern skilled in HTML, CSS, and JavaScript." |
|
|
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') |
|
|
|
|
|
|
|
encoded_student_resume_1 = tokenizer(student_resume_1, padding=True, truncation=True, return_tensors='pt') |
|
encoded_internship_job_description_1 = tokenizer(internship_job_description_1, padding=True, truncation=True, return_tensors='pt') |
|
|
|
encoded_student_resume_2 = tokenizer(student_resume_2, padding=True, truncation=True, return_tensors='pt') |
|
encoded_internship_job_description_2 = tokenizer(internship_job_description_2, padding=True, truncation=True, return_tensors='pt') |
|
|
|
|
|
|
|
batch_size = 2 |
|
dataset = [(encoded_student_resume_1, encoded_internship_job_description_1), |
|
(encoded_student_resume_2, encoded_internship_job_description_2)] |
|
dataloader = DataLoader(dataset, batch_size=batch_size) |
|
|
|
|
|
|
|
model = BertModel.from_pretrained('bert-base-uncased') |
|
|
|
|
|
for batch in dataloader: |
|
student_resume_batch, internship_job_description_batch = batch |
|
|
|
|
|
student_resume_outputs = model(**student_resume_batch) |
|
internship_job_description_outputs = model(**internship_job_description_batch) |
|
|
|
|
|
student_resume_last_hidden_states = student_resume_outputs.last_hidden_state |
|
internship_job_description_last_hidden_states = internship_job_description_outputs.last_hidden_state |
|
|
|
|
|
|
|
|
|
|