cxumol commited on
Commit
02fdb50
1 Parent(s): b22f922

pre-release milstone: generate full text letter

Browse files
Files changed (5) hide show
  1. app.py +53 -20
  2. config.py +1 -1
  3. taskAI.py +58 -22
  4. test.py +1 -1
  5. util.py +22 -2
app.py CHANGED
@@ -3,6 +3,7 @@ from config import CHEAP_API_BASE, CHEAP_API_KEY, CHEAP_MODEL
3
  from config import STRONG_API_BASE, STRONG_API_KEY, STRONG_MODEL
4
  from util import is_valid_url
5
  from util import mylogger
 
6
  from taskNonAI import extract_url, file_to_html
7
  from taskAI import TaskAI
8
  ## load data
@@ -22,8 +23,7 @@ def init():
22
  os.system("shot-scraper install -b firefox")
23
  download_pandoc()
24
 
25
-
26
- def run_refine(api_base, api_key, api_model, jd_info, cv_file: str, cv_text):
27
  if jd_info:
28
  if is_valid_url(jd_info):
29
  jd = extract_url(jd_info)
@@ -42,11 +42,15 @@ def run_refine(api_base, api_key, api_model, jd_info, cv_file: str, cv_text):
42
  cv = file_to_html(cv_file)
43
  else:
44
  cv = mock_cv
 
 
 
 
45
  cheapAPI = {"base": api_base, "key": api_key, "model": api_model}
46
  taskAI = TaskAI(cheapAPI, temperature=0.2, max_tokens=2048) # max_tokens=2048
47
  info("API initialized")
48
  gen = (
49
- taskAI.jd_preprocess(topic="job description", input=jd),
50
  taskAI.cv_preprocess(input=cv),
51
  )
52
  info("tasks initialized")
@@ -67,32 +71,46 @@ def run_refine(api_base, api_key, api_model, jd_info, cv_file: str, cv_text):
67
 
68
  def run_compose(api_base, api_key, api_model, min_jd, min_cv):
69
  strongAPI = {"base": api_base, "key": api_key, "model": api_model}
70
- taskAI = TaskAI(strongAPI, temperature=0.5, max_tokens=2048)
71
- info("API initialized")
 
 
 
 
72
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  with gr.Blocks(
75
  title=DEMO_TITLE,
76
  theme=gr.themes.Base(primary_hue="blue", secondary_hue="sky", neutral_hue="slate"),
77
- ) as demo:
78
  intro = f"""# {DEMO_TITLE}
79
  > You provide job description and résumé. I write Cover letter for you!
80
- Before you use, please setup OpenAI-like API for 2 AI agents': Cheap AI and Strong AI.
81
  """
82
  gr.Markdown(intro)
83
 
84
  with gr.Row():
85
  with gr.Column(scale=1):
86
- with gr.Accordion("AI setup (OpenAI-like API)", open=False):
87
  gr.Markdown(
88
- "**Cheap AI**, an honest format converter and refinery machine, extracts essential info from job description and résumé, to reduce subsequent cost on Strong AI."
89
  )
90
  with gr.Group():
91
- weak_base = gr.Textbox(
92
  value=CHEAP_API_BASE, label="API BASE"
93
  )
94
- weak_key = gr.Textbox(value=CHEAP_API_KEY, label="API key")
95
- weak_model = gr.Textbox(value=CHEAP_MODEL, label="Model ID")
96
  gr.Markdown(
97
  "---\n**Strong AI**, a thoughtful wordsmith, generates perfect cover letters to make both you and recruiters happy."
98
  )
@@ -108,8 +126,9 @@ with gr.Blocks(
108
  gr.Markdown("## Employer - Job Description")
109
  jd_info = gr.Textbox(
110
  label="Job Description",
111
- placeholder="Paste as Full Text (recommmend) or URL (may fail)",
112
  lines=5,
 
113
  )
114
  with gr.Group():
115
  gr.Markdown("## Applicant - CV / Résumé")
@@ -126,10 +145,15 @@ with gr.Blocks(
126
  )
127
  with gr.Column(scale=2):
128
  gr.Markdown("## Result")
129
- with gr.Row():
130
- min_jd = gr.TextArea(label="Minimized Job Description")
131
- min_cv = gr.TextArea(label="Minimized CV / Résumé")
132
- cover_letter_text = gr.TextArea(label="Cover Letter")
 
 
 
 
 
133
  cover_letter_pdf = gr.File(
134
  label="Cover Letter PDF",
135
  file_count="single",
@@ -137,16 +161,25 @@ with gr.Blocks(
137
  type="filepath",
138
  )
139
  infer_btn = gr.Button("Go!", variant="primary")
 
 
140
  infer_btn.click(
 
 
 
 
141
  fn=run_refine,
142
- inputs=[weak_base, weak_key, weak_model, jd_info, cv_file, cv_text],
143
  outputs=[min_jd, min_cv],
144
- concurrency_limit=5,
 
 
 
145
  )
146
 
147
 
148
  if __name__ == "__main__":
149
  init()
150
- demo.queue(max_size=10).launch(
151
  show_error=True, debug=True, share=IS_SHARE
152
  )
 
3
  from config import STRONG_API_BASE, STRONG_API_KEY, STRONG_MODEL
4
  from util import is_valid_url
5
  from util import mylogger
6
+ from util import stream_together
7
  from taskNonAI import extract_url, file_to_html
8
  from taskAI import TaskAI
9
  ## load data
 
23
  os.system("shot-scraper install -b firefox")
24
  download_pandoc()
25
 
26
+ def prepare_input(jd_info, cv_file: str, cv_text):
 
27
  if jd_info:
28
  if is_valid_url(jd_info):
29
  jd = extract_url(jd_info)
 
42
  cv = file_to_html(cv_file)
43
  else:
44
  cv = mock_cv
45
+ return jd, cv
46
+
47
+ def run_refine(api_base, api_key, api_model, jd_info, cv_text):
48
+ jd,cv=jd_info,cv_text
49
  cheapAPI = {"base": api_base, "key": api_key, "model": api_model}
50
  taskAI = TaskAI(cheapAPI, temperature=0.2, max_tokens=2048) # max_tokens=2048
51
  info("API initialized")
52
  gen = (
53
+ taskAI.jd_preprocess(input=jd),
54
  taskAI.cv_preprocess(input=cv),
55
  )
56
  info("tasks initialized")
 
71
 
72
  def run_compose(api_base, api_key, api_model, min_jd, min_cv):
73
  strongAPI = {"base": api_base, "key": api_key, "model": api_model}
74
+ taskAI = TaskAI(strongAPI, temperature=0.6, max_tokens=4000)
75
+ info("Composing letter with CoT ...")
76
+ result = ""
77
+ for response in taskAI.compose_letter_CoT(jd=min_jd, resume=min_cv):
78
+ result += response.delta
79
+ yield result
80
 
81
+ def finalize_letter_txt(api_base, api_key, api_model, debug_CoT, jd, cv):
82
+ cheapAPI = {"base": api_base, "key": api_key, "model": api_model}
83
+ taskAI = TaskAI(cheapAPI, temperature=0.2, max_tokens=2048)
84
+ info("Finalizing letter ...")
85
+ gen = stream_together(
86
+ taskAI.purify_letter(full_text=debug_CoT),
87
+ taskAI.get_jobapp_meta(JD=jd, CV=cv),
88
+ )
89
+ for result in gen:
90
+ yield result
91
 
92
  with gr.Blocks(
93
  title=DEMO_TITLE,
94
  theme=gr.themes.Base(primary_hue="blue", secondary_hue="sky", neutral_hue="slate"),
95
+ ) as app:
96
  intro = f"""# {DEMO_TITLE}
97
  > You provide job description and résumé. I write Cover letter for you!
98
+ Before you use, please fisrt setup API for 2 AI agents': Cheap AI and Strong AI.
99
  """
100
  gr.Markdown(intro)
101
 
102
  with gr.Row():
103
  with gr.Column(scale=1):
104
+ with gr.Accordion("AI setup (OpenAI-compatible LLM API)", open=False):
105
  gr.Markdown(
106
+ "**Cheap AI**, an honest format converter and refiner, extracts essential info from job description and résumé, to reduce subsequent cost on Strong AI."
107
  )
108
  with gr.Group():
109
+ cheap_base = gr.Textbox(
110
  value=CHEAP_API_BASE, label="API BASE"
111
  )
112
+ cheap_key = gr.Textbox(value=CHEAP_API_KEY, label="API key")
113
+ cheap_model = gr.Textbox(value=CHEAP_MODEL, label="Model ID")
114
  gr.Markdown(
115
  "---\n**Strong AI**, a thoughtful wordsmith, generates perfect cover letters to make both you and recruiters happy."
116
  )
 
126
  gr.Markdown("## Employer - Job Description")
127
  jd_info = gr.Textbox(
128
  label="Job Description",
129
+ placeholder="Paste as Full Text (recommmend) or URL",
130
  lines=5,
131
+ max_lines=10,
132
  )
133
  with gr.Group():
134
  gr.Markdown("## Applicant - CV / Résumé")
 
145
  )
146
  with gr.Column(scale=2):
147
  gr.Markdown("## Result")
148
+ with gr.Accordion("Reformatting", open=True) as reformat_zone:
149
+ with gr.Row():
150
+ min_jd = gr.TextArea(label="Reformatted Job Description")
151
+ min_cv = gr.TextArea(label="Reformatted CV / Résumé")
152
+ with gr.Accordion("Expert Zone", open=False) as expert_zone:
153
+
154
+ debug_CoT = gr.Textbox(label="Chain of Thoughts")
155
+ debug_jobapp = gr.Textbox(label="Job application meta data")
156
+ cover_letter_text = gr.Textbox(label="Cover Letter")
157
  cover_letter_pdf = gr.File(
158
  label="Cover Letter PDF",
159
  file_count="single",
 
161
  type="filepath",
162
  )
163
  infer_btn = gr.Button("Go!", variant="primary")
164
+
165
+
166
  infer_btn.click(
167
+ fn=prepare_input,
168
+ inputs=[jd_info, cv_file, cv_text],
169
+ outputs=[jd_info, cv_text]
170
+ ).then(
171
  fn=run_refine,
172
+ inputs=[cheap_base, cheap_key, cheap_model, jd_info, cv_text],
173
  outputs=[min_jd, min_cv],
174
+ ).then(fn=lambda:[gr.Accordion("Expert Zone", open=True),gr.Accordion("Reformatting", open=False)],inputs=None, outputs=[expert_zone, reformat_zone]
175
+ ).then(fn=run_compose, inputs=[strong_base, strong_key, strong_model, min_jd, min_cv], outputs=[debug_CoT]
176
+ ).then(fn=lambda:gr.Accordion("Expert Zone", open=False),inputs=None, outputs=[expert_zone]
177
+ ).then(fn=finalize_letter_txt, inputs=[cheap_base, cheap_key, cheap_model, debug_CoT, jd_info, cv_text], outputs=[cover_letter_text, debug_jobapp]
178
  )
179
 
180
 
181
  if __name__ == "__main__":
182
  init()
183
+ app.queue(max_size=10, default_concurrency_limit=1).launch(
184
  show_error=True, debug=True, share=IS_SHARE
185
  )
config.py CHANGED
@@ -5,7 +5,7 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or ""
5
 
6
  CHEAP_API_BASE = os.getenv("CHEAP_API_BASE") or OPENAI_API_BASE
7
  CHEAP_API_KEY = os.getenv("CHEAP_API_KEY") or OPENAI_API_KEY
8
- CHEAP_MODEL = os.getenv("CHEAP_MODEL") or "gpt-3.5-turbo"
9
 
10
  STRONG_API_BASE = os.getenv("STRONG_API_BASE") or OPENAI_API_BASE
11
  STRONG_API_KEY = os.getenv("STRONG_API_KEY") or OPENAI_API_KEY
 
5
 
6
  CHEAP_API_BASE = os.getenv("CHEAP_API_BASE") or OPENAI_API_BASE
7
  CHEAP_API_KEY = os.getenv("CHEAP_API_KEY") or OPENAI_API_KEY
8
+ CHEAP_MODEL = os.getenv("CHEAP_MODEL") or "gpt-4"
9
 
10
  STRONG_API_BASE = os.getenv("STRONG_API_BASE") or OPENAI_API_BASE
11
  STRONG_API_KEY = os.getenv("STRONG_API_KEY") or OPENAI_API_KEY
taskAI.py CHANGED
@@ -1,28 +1,26 @@
 
 
1
  from llama_index.llms.openai_like import OpenAILike
2
  from llama_index.core.llms import ChatMessage # , MessageRole
3
  from llama_index.core import ChatPromptTemplate
4
 
5
  from util import mylogger
6
-
7
- logger = mylogger(__name__,'%(asctime)s:%(levelname)s:%(message)s')
8
- info = logger.info
9
-
10
-
11
  ## define templates
12
 
13
  ### topic,input
14
- JD_PREPROCESS = ChatPromptTemplate(
15
  [
16
  ChatMessage(
17
  role="system",
18
- content="You are a content extractor. You never paraphrase; you only reduce content at the sentence level. Your mission is to extract information directly related to {topic} from user input. Make sure output contains complete information.",
19
  ),
20
  ChatMessage(role="user", content="{input}"),
21
  ]
22
  )
23
 
24
  ### input
25
- CV_PREPROCESS = ChatPromptTemplate(
26
  [
27
  ChatMessage(
28
  role="system",
@@ -32,23 +30,39 @@ CV_PREPROCESS = ChatPromptTemplate(
32
  ]
33
  )
34
 
35
- ## basic func
 
 
 
 
 
 
 
 
 
 
36
 
 
 
 
 
 
 
37
 
38
- def oai(base: str, key: str, model: str, **kwargs) -> OpenAILike:
39
- return OpenAILike(
40
- api_base=base,
41
- api_key=key,
42
- model=model,
43
- is_chat_model=True,
44
- context_window=window_size,
45
- **kwargs,
46
- )
47
 
 
48
 
49
  ## tasks
50
  class TaskAI(OpenAILike):
51
  def __init__(self, api: dict[str, str], **kwargs):
 
 
 
52
  def guess_window_size(model=api["model"]):
53
  _mid = model.lower()
54
  windows: dict = {
@@ -60,15 +74,37 @@ class TaskAI(OpenAILike):
60
  for ws, names in windows.items():
61
  if any([n in _mid for n in names]):
62
  window_size = ws
63
- info(f"use context window size: {window_size} for {model}")
64
  return window_size
65
 
66
  super().__init__(
67
  api_base=api["base"], api_key=api["key"], model=api["model"], is_chat_model=True, context_window=guess_window_size(), **kwargs
68
  )
69
 
70
- def jd_preprocess(self, topic: str, input: str):
71
- return self.stream_chat(JD_PREPROCESS.format_messages(topic=topic, input=input))
72
 
73
  def cv_preprocess(self, input: str):
74
- return self.stream_chat(CV_PREPROCESS.format_messages(input=input))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
  from llama_index.llms.openai_like import OpenAILike
4
  from llama_index.core.llms import ChatMessage # , MessageRole
5
  from llama_index.core import ChatPromptTemplate
6
 
7
  from util import mylogger
8
+ logger = mylogger(__name__,'%(asctime)s:%(filename)s:%(levelname)s:%(message)s')
 
 
 
 
9
  ## define templates
10
 
11
  ### topic,input
12
+ EXTRACT_INFO = ChatPromptTemplate(
13
  [
14
  ChatMessage(
15
  role="system",
16
+ content="You are a content extractor. You never paraphrase; you only reduce content at the sentence level. Your mission is to extract {to_extract} from user input. Make sure output is complete without missing parts. Output is in a clean text format",
17
  ),
18
  ChatMessage(role="user", content="{input}"),
19
  ]
20
  )
21
 
22
  ### input
23
+ SIMPLIFY_MD = ChatPromptTemplate(
24
  [
25
  ChatMessage(
26
  role="system",
 
30
  ]
31
  )
32
 
33
+ ### template, content
34
+ JSON_API = ChatPromptTemplate(
35
+ [
36
+ ChatMessage(
37
+ role="system",
38
+ content="You are an AI JSON API. You convert user input into a JSON object. API returns exactly in this template: {template}",
39
+ ),
40
+ ChatMessage(role="user", content="{content}"),
41
+ ]
42
+ )
43
+ keys_to_template = lambda keys : json.dumps(dict().fromkeys(keys, ""))
44
 
45
+ ### resume, jd
46
+ LETTER_COMPOSE = ChatPromptTemplate(
47
+ [
48
+ ChatMessage(
49
+ role="system",
50
+ content="""You are a thoughtful wordsmith. You have a deep understanding of the scoiety and the bussiness world. You are always willing to help people find a job. Your mission is to write a compelling cover letter tailored for user to get the specified job, based on the provided RESUME and JOB_DESCRIPTION. Your writing is based on ground truth and you never fabricate anything you are unsure about.
51
 
52
+ Before officially write the letter, think step by step. First, list what makes a perfect cover letter in general, and in order to write a perfect cover letter, what key points do you have to learn from the RESUME and JOB_DESCRIPTION. Then, carefully analyze the given RESUME and JOB_DESCRIPTION, take a deep breath and propose 3 best tactics to convince recruiter believe the applicant fit for the role. Ensure your thoughts are express clearly and then write the complete cover letter.""",
53
+ ),
54
+ ChatMessage(role="user", content="<RESUME>\n{resume}\n</RESUME>\n\n<JOB_DESCRIPTION>\n{jd}</JOB_DESCRIPTION>\n"),
55
+ ]
56
+ )
 
 
 
 
57
 
58
+ ## basic func
59
 
60
  ## tasks
61
  class TaskAI(OpenAILike):
62
  def __init__(self, api: dict[str, str], **kwargs):
63
+
64
+ log = logger.info
65
+
66
  def guess_window_size(model=api["model"]):
67
  _mid = model.lower()
68
  windows: dict = {
 
74
  for ws, names in windows.items():
75
  if any([n in _mid for n in names]):
76
  window_size = ws
77
+ log(f"use context window size: {window_size} for {model}")
78
  return window_size
79
 
80
  super().__init__(
81
  api_base=api["base"], api_key=api["key"], model=api["model"], is_chat_model=True, context_window=guess_window_size(), **kwargs
82
  )
83
 
84
+ def jd_preprocess(self, input: str):
85
+ return self.stream_chat(EXTRACT_INFO.format_messages(to_extract="information directly related to job description", input=input))
86
 
87
  def cv_preprocess(self, input: str):
88
+ return self.stream_chat(SIMPLIFY_MD.format_messages(input=input))
89
+
90
+ def compose_letter_CoT(self, resume: str, jd: str):
91
+ return self.stream_chat(LETTER_COMPOSE.format_messages(resume=resume, jd=jd))
92
+
93
+ def get_jobapp_meta(self, JD, CV):
94
+ meta_JD = self.chat(JSON_API.format_messages(template=keys_to_template(["company_full_name", "job_title"]), content=JD)).message.content
95
+ # yield meta_JD
96
+ meta_CV = self.chat(JSON_API.format_messages(template=keys_to_template(["applicant_full_name", "applicant_contact_information"]), content=CV)).message.content
97
+ # yield meta_JD+'\n'+meta_CV
98
+ try:
99
+ meta_JD = json.loads(meta_JD.strip())
100
+ meta_CV = json.loads(meta_CV.strip())
101
+ except:
102
+ raise ValueError(f"AI didn't return a valid JSON string. Try again or consider a better model for CheapAI. \n{meta_JD}\n{meta_CV}")
103
+ meta = dict()
104
+ meta.update(meta_JD)
105
+ meta.update(meta_CV)
106
+ yield json.dumps(meta, indent=2)
107
+
108
+ def purify_letter(self, full_text):
109
+ return self.stream_chat(EXTRACT_INFO.format_messages(to_extract="the letter part from greeting to sign-off, and remove applicant's name at end", input=full_text))
110
+
test.py CHANGED
@@ -28,5 +28,5 @@ def test_taskAI():
28
  print(chunk)
29
 
30
  if __name__ == "__main__":
31
-
32
  # integration()
 
28
  print(chunk)
29
 
30
  if __name__ == "__main__":
31
+ test_taskAI()
32
  # integration()
util.py CHANGED
@@ -2,12 +2,13 @@ import tiktoken
2
 
3
  from urllib.parse import urlparse
4
  import requests
5
-
6
  import logging
7
 
 
 
8
  def mylogger(name, format, level=logging.INFO):
9
  # Create a custom logger
10
- logger = logging.getLogger("custom_logger")
11
  logger.setLevel(level)
12
  # Configure the custom logger with the desired settings
13
  formatter = logging.Formatter(format)
@@ -42,3 +43,22 @@ def is_valid_openai_api_key(api_base:str, api_key: str)->bool:
42
 
43
  def zip_api(api_base:str, api_key:str, model:str)->dict[str, str]:
44
  return {"base": api_base, "key": api_key, "model": model}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  from urllib.parse import urlparse
4
  import requests
 
5
  import logging
6
 
7
+ from typing import Generator
8
+
9
  def mylogger(name, format, level=logging.INFO):
10
  # Create a custom logger
11
+ logger = logging.getLogger(name)
12
  logger.setLevel(level)
13
  # Configure the custom logger with the desired settings
14
  formatter = logging.Formatter(format)
 
43
 
44
  def zip_api(api_base:str, api_key:str, model:str)->dict[str, str]:
45
  return {"base": api_base, "key": api_key, "model": model}
46
+
47
+ def stream_together(*gens: Generator):
48
+ ln=len(gens)
49
+ result = [""] * ln # Mind type here
50
+ while 1:
51
+ stop: bool = True
52
+ for i in range(ln):
53
+ try:
54
+ n=next(gens[i])
55
+ if "delta" in dir(n):
56
+ n=n.delta
57
+ result[i] += n
58
+ stop = False
59
+ except StopIteration:
60
+ # info(f"gen[{i}] exhausted")
61
+ pass
62
+ yield result
63
+ if stop:
64
+ break