hashmalmellow commited on
Commit
be75081
1 Parent(s): 0817de7

Create test2.py

Browse files
Files changed (1) hide show
  1. test2.py +50 -0
test2.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer, BertModel
2
+ import torch
3
+ from torch.utils.data import DataLoader
4
+
5
+ # Step 1: Preprocess Your Data
6
+ # Sample student resumes and internship job descriptions
7
+ student_resume_1 = "I have experience in Python programming and data analysis."
8
+ internship_job_description_1 = "Looking for a data analyst intern proficient in Python and SQL."
9
+
10
+ student_resume_2 = "Experienced in web development with HTML, CSS, and JavaScript."
11
+ internship_job_description_2 = "Seeking a web development intern skilled in HTML, CSS, and JavaScript."
12
+
13
+ # Step 2: Tokenization
14
+ # Load pre-trained BERT tokenizer
15
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
16
+
17
+ # Step 3: Encoding Inputs
18
+ # Tokenize and encode the text data
19
+ encoded_student_resume_1 = tokenizer(student_resume_1, padding=True, truncation=True, return_tensors='pt')
20
+ encoded_internship_job_description_1 = tokenizer(internship_job_description_1, padding=True, truncation=True, return_tensors='pt')
21
+
22
+ encoded_student_resume_2 = tokenizer(student_resume_2, padding=True, truncation=True, return_tensors='pt')
23
+ encoded_internship_job_description_2 = tokenizer(internship_job_description_2, padding=True, truncation=True, return_tensors='pt')
24
+
25
+ # Step 4: Batching and Data Loading
26
+ # Organize encoded input features into batches
27
+ batch_size = 2
28
+ dataset = [(encoded_student_resume_1, encoded_internship_job_description_1),
29
+ (encoded_student_resume_2, encoded_internship_job_description_2)]
30
+ dataloader = DataLoader(dataset, batch_size=batch_size)
31
+
32
+ # Step 5: Feed Data into the Model
33
+ # Load pre-trained BERT model
34
+ model = BertModel.from_pretrained('bert-base-uncased')
35
+
36
+ # Iterate over batches and feed data into the model
37
+ for batch in dataloader:
38
+ student_resume_batch, internship_job_description_batch = batch
39
+
40
+ # Feed input_ids to the model
41
+ student_resume_outputs = model(**student_resume_batch)
42
+ internship_job_description_outputs = model(**internship_job_description_batch)
43
+
44
+ # Get model outputs
45
+ student_resume_last_hidden_states = student_resume_outputs.last_hidden_state
46
+ internship_job_description_last_hidden_states = internship_job_description_outputs.last_hidden_state
47
+
48
+ # Perform further processing or analysis with the model outputs
49
+ # For example, compute similarity scores between student resumes and internship job descriptions
50
+ # ...