drclab commited on
Commit
ad15d6a
1 Parent(s): 19ed75c
Files changed (3) hide show
  1. hugging_face.ipynb +2 -2
  2. pdf.ipynb +77 -0
  3. prompt_cv.ipynb +62 -22
hugging_face.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -13,7 +13,7 @@
13
  },
14
  {
15
  "cell_type": "code",
16
- "execution_count": 4,
17
  "metadata": {},
18
  "outputs": [
19
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
13
  },
14
  {
15
  "cell_type": "code",
16
+ "execution_count": 2,
17
  "metadata": {},
18
  "outputs": [
19
  {
pdf.ipynb CHANGED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import PyPDF2"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 4,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "pdf_file = open('CV2023.pdf', 'rb')"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 8,
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/plain": [
29
+ "2"
30
+ ]
31
+ },
32
+ "execution_count": 8,
33
+ "metadata": {},
34
+ "output_type": "execute_result"
35
+ }
36
+ ],
37
+ "source": [
38
+ "pdf_reader = PyPDF2.PdfReader(pdf_file)\n",
39
+ "len(pdf_reader.pages)"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 10,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "with open('cv.txt', 'w') as txt_file:\n",
49
+ " for page in pdf_reader.pages:\n",
50
+ " txt = page.extract_text()\n",
51
+ " txt_file.write(txt)"
52
+ ]
53
+ }
54
+ ],
55
+ "metadata": {
56
+ "kernelspec": {
57
+ "display_name": "Python 3",
58
+ "language": "python",
59
+ "name": "python3"
60
+ },
61
+ "language_info": {
62
+ "codemirror_mode": {
63
+ "name": "ipython",
64
+ "version": 3
65
+ },
66
+ "file_extension": ".py",
67
+ "mimetype": "text/x-python",
68
+ "name": "python",
69
+ "nbconvert_exporter": "python",
70
+ "pygments_lexer": "ipython3",
71
+ "version": "3.10.6"
72
+ },
73
+ "orig_nbformat": 4
74
+ },
75
+ "nbformat": 4,
76
+ "nbformat_minor": 2
77
+ }
prompt_cv.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": 2,
14
  "metadata": {},
15
  "outputs": [],
16
  "source": [
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 3,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
@@ -29,7 +29,7 @@
29
  },
30
  {
31
  "cell_type": "code",
32
- "execution_count": 8,
33
  "metadata": {},
34
  "outputs": [],
35
  "source": [
@@ -45,51 +45,91 @@
45
  },
46
  {
47
  "cell_type": "code",
48
- "execution_count": 5,
49
  "metadata": {},
50
  "outputs": [],
51
  "source": [
52
- "text = f\"\"\"\n",
53
- "You should express what you want a model to do by \\ \n",
54
- "providing instructions that are as clear and \\ \n",
55
- "specific as you can possibly make them. \\ \n",
56
- "This will guide the model towards the desired output, \\ \n",
57
- "and reduce the chances of receiving irrelevant \\ \n",
58
- "or incorrect responses. Don't confuse writing a \\ \n",
59
- "clear prompt with writing a short prompt. \\ \n",
60
- "In many cases, longer prompts provide more clarity \\ \n",
61
- "and context for the model, which can lead to \\ \n",
62
- "more detailed and relevant outputs.\n",
63
- "\"\"\""
64
  ]
65
  },
66
  {
67
  "cell_type": "code",
68
- "execution_count": 6,
69
  "metadata": {},
70
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  "source": [
72
  "prompt = f\"\"\"\n",
73
- "Summarize the text delimited by triple backticks \\ \n",
74
- "into a single sentence.\n",
75
  "```{text}```\n",
76
- "\"\"\""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ]
78
  },
79
  {
80
  "cell_type": "code",
81
- "execution_count": 9,
82
  "metadata": {},
83
  "outputs": [
84
  {
85
  "name": "stdout",
86
  "output_type": "stream",
87
  "text": [
88
- "Clear and specific instructions should be provided to guide a model towards the desired output, and longer prompts can provide more clarity and context for the model, leading to more detailed and relevant outputs.\n"
89
  ]
90
  }
91
  ],
92
  "source": [
 
 
 
 
 
93
  "response = get_completion(prompt)\n",
94
  "print(response)"
95
  ]
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 5,
14
  "metadata": {},
15
  "outputs": [],
16
  "source": [
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 6,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
29
  },
30
  {
31
  "cell_type": "code",
32
+ "execution_count": 7,
33
  "metadata": {},
34
  "outputs": [],
35
  "source": [
 
45
  },
46
  {
47
  "cell_type": "code",
48
+ "execution_count": 2,
49
  "metadata": {},
50
  "outputs": [],
51
  "source": [
52
+ "with open('cv.txt', 'rb') as txt_file:\n",
53
+ " cv = txt_file.read()"
 
 
 
 
 
 
 
 
 
 
54
  ]
55
  },
56
  {
57
  "cell_type": "code",
58
+ "execution_count": 4,
59
  "metadata": {},
60
  "outputs": [],
61
+ "source": [
62
+ "text = f\"\"\"{cv}\"\"\""
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": 16,
68
+ "metadata": {},
69
+ "outputs": [
70
+ {
71
+ "name": "stdout",
72
+ "output_type": "stream",
73
+ "text": [
74
+ "C.J. Duan is a Data Scientist with experience in media mix modeling, algorithmic trading, transportation scheduling, and Bayesian modeling, as well as a Ph.D in Industrial Management from Clemson University and publications in various journals and conference proceedings.\n"
75
+ ]
76
+ }
77
+ ],
78
  "source": [
79
  "prompt = f\"\"\"\n",
80
+ "Summarize the candidate's experiences in the text delimited by triple backticks \\ \n",
81
+ "into a single sentence using no more than 50 words.\n",
82
  "```{text}```\n",
83
+ "\"\"\"\n",
84
+ "response = get_completion(prompt)\n",
85
+ "print(response)\n"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 18,
91
+ "metadata": {},
92
+ "outputs": [
93
+ {
94
+ "name": "stdout",
95
+ "output_type": "stream",
96
+ "text": [
97
+ "{\n",
98
+ " 'first_name': 'C.J.',\n",
99
+ " 'surname': 'DUAN'\n",
100
+ "}\n"
101
+ ]
102
+ }
103
+ ],
104
+ "source": [
105
+ "prompt = f\"\"\"\n",
106
+ "Find the candidate's name in the text delimited by triple backticks \\ \n",
107
+ " and pit into a dictionary with keys including 'first_name', 'surname'.\n",
108
+ "```{text}```\n",
109
+ "\"\"\"\n",
110
+ "response = get_completion(prompt)\n",
111
+ "print(response)\n"
112
  ]
113
  },
114
  {
115
  "cell_type": "code",
116
+ "execution_count": 19,
117
  "metadata": {},
118
  "outputs": [
119
  {
120
  "name": "stdout",
121
  "output_type": "stream",
122
  "text": [
123
+ "The ideal candidate for this job would be a Data Scientist with a Ph.D in Industrial Management and experience in Bayesian modeling, media mix modeling, algorithmic trading, transportation scheduling, and survival analysis. They should be proficient in Python, R, Pyro, PyTorch, and Stan. The candidate should have experience in completing freelance projects and working as a Chief Research Data Scientist. They should also have experience in teaching quantitative methods and have published research papers and conference proceedings. The candidate should have excellent communication skills and be able to work independently as well as in a team.\n"
124
  ]
125
  }
126
  ],
127
  "source": [
128
+ "prompt = f\"\"\"\n",
129
+ "please write a job description based on the candidate's experiences in the text delimited by triple backticks \\ \n",
130
+ "using no more than 100 words.\n",
131
+ "```{text}```\n",
132
+ "\"\"\"\n",
133
  "response = get_completion(prompt)\n",
134
  "print(response)"
135
  ]