cxumol commited on
Commit
b22f922
0 Parent(s):

preprocess ok

Browse files
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.ttf filter=lfs diff=lfs merge=lfs -text
2
+ *.otf filter=lfs diff=lfs merge=lfs -text
3
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *_secret.py
2
+ *_secret.py*
3
+
4
+ .local/
5
+ .ruff_cache/
6
+ bin/
7
+ *.deb
8
+ *.bin
9
+
10
+ # Byte-compiled / optimized / DLL files
11
+ __pycache__/
12
+ *.py[cod]
13
+ *$py.class
14
+
15
+ # C extensions
16
+ *.so
17
+
18
+ # Distribution / packaging
19
+ .Python
20
+ build/
21
+ develop-eggs/
22
+ dist/
23
+ downloads/
24
+ eggs/
25
+ .eggs/
26
+ lib/
27
+ lib64/
28
+ parts/
29
+ sdist/
30
+ var/
31
+ wheels/
32
+ share/python-wheels/
33
+ *.egg-info/
34
+ .installed.cfg
35
+ *.egg
36
+ MANIFEST
37
+
38
+ # PyInstaller
39
+ # Usually these files are written by a python script from a template
40
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
41
+ *.manifest
42
+ *.spec
43
+
44
+ # Installer logs
45
+ pip-log.txt
46
+ pip-delete-this-directory.txt
47
+
48
+ # Unit test / coverage reports
49
+ htmlcov/
50
+ .tox/
51
+ .nox/
52
+ .coverage
53
+ .coverage.*
54
+ .cache
55
+ nosetests.xml
56
+ coverage.xml
57
+ *.cover
58
+ *.py,cover
59
+ .hypothesis/
60
+ .pytest_cache/
61
+ cover/
62
+
63
+ # Translations
64
+ *.mo
65
+ *.pot
66
+
67
+ # Django stuff:
68
+ *.log
69
+ local_settings.py
70
+ db.sqlite3
71
+ db.sqlite3-journal
72
+
73
+ # Flask stuff:
74
+ instance/
75
+ .webassets-cache
76
+
77
+ # Scrapy stuff:
78
+ .scrapy
79
+
80
+ # Sphinx documentation
81
+ docs/_build/
82
+
83
+ # PyBuilder
84
+ .pybuilder/
85
+ target/
86
+
87
+ # Jupyter Notebook
88
+ .ipynb_checkpoints
89
+
90
+ # IPython
91
+ profile_default/
92
+ ipython_config.py
93
+
94
+ # pyenv
95
+ # For a library or package, you might want to ignore these files since the code is
96
+ # intended to run in multiple environments; otherwise, check them in:
97
+ # .python-version
98
+
99
+ # pipenv
100
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
102
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
103
+ # install all needed dependencies.
104
+ #Pipfile.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ #poetry.lock
112
+
113
+ # pdm
114
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115
+ #pdm.lock
116
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117
+ # in version control.
118
+ # https://pdm.fming.dev/#use-with-ide
119
+ .pdm.toml
120
+
121
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122
+ __pypackages__/
123
+
124
+ # Celery stuff
125
+ celerybeat-schedule
126
+ celerybeat.pid
127
+
128
+ # SageMath parsed files
129
+ *.sage.py
130
+
131
+ # Environments
132
+ .env
133
+ .venv
134
+ env/
135
+ venv/
136
+ ENV/
137
+ env.bak/
138
+ venv.bak/
139
+
140
+ # Spyder project settings
141
+ .spyderproject
142
+ .spyproject
143
+
144
+ # Rope project settings
145
+ .ropeproject
146
+
147
+ # mkdocs documentation
148
+ /site
149
+
150
+ # mypy
151
+ .mypy_cache/
152
+ .dmypy.json
153
+ dmypy.json
154
+
155
+ # Pyre type checker
156
+ .pyre/
157
+
158
+ # pytype static type analyzer
159
+ .pytype/
160
+
161
+ # Cython debug symbols
162
+ cython_debug/
.lightning_studio/.studiorc ADDED
@@ -0,0 +1 @@
 
 
1
+ /settings/.studiorc
.lightning_studio/on_start.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # This script runs every time your Studio starts, from your home directory.
4
+
5
+ # List files under fast_load that need to load quickly on start (e.g. model checkpoints).
6
+ #
7
+ # ! fast_load
8
+ # <your file here>
9
+
10
+ # Add your startup commands below.
11
+ #
12
+ # Example: streamlit run my_app.py
13
+ # Example: gradio my_app.py
.litng.gradio.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #!/bin/bash
2
+ pip install -U "gradio>=4,<=5"
3
+ GRADIO_SERVER_PORT=7860 gradio app.py --watch-dirs .
.vscode/settings.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "python.defaultInterpreterPath": "/home/zeus/miniconda3/envs/cloudspace/bin/python",
3
+ "workbench.startupEditor": "none",
4
+ "python.terminal.activateEnvironment": false,
5
+ "terminal.integrated.drawBoldTextInBrightColors": false,
6
+ "terminal.integrated.gpuAcceleration": "on",
7
+ "terminal.integrated.localEchoLatencyThreshold": 0,
8
+ "terminal.integrated.localEchoEnabled": "off",
9
+ "terminal.integrated.localEchoStyle": "#000000",
10
+ "remote.autoForwardPorts": false,
11
+ "terminal.integrated.defaultProfile.linux": "zsh",
12
+ "terminal.integrated.tabs.title": "${process}${separator}${task}",
13
+ "jupyter.notebookFileRoot": "${workspaceFolder}",
14
+ "terminal.integrated.enableMultiLinePasteWarning": false,
15
+ "files.exclude": {
16
+ "venv": true,
17
+ "lightning-annotations.json": true,
18
+ ".lightning-app-sync": true,
19
+ ".lighting-app-sync": true,
20
+ ".lightning-app-run": true,
21
+ "**/*.lightning_upload": true,
22
+ ".ssh*/**": true,
23
+ ".conda*/**": true,
24
+ ".config*/**": true,
25
+ ".npm*/**": true,
26
+ ".nvm*/**": true,
27
+ ".bower*/**": true,
28
+ ".ipython": true,
29
+ ".local*/**": true,
30
+ ".oh-my-zsh*/**": true,
31
+ ".cache": true,
32
+ "miniconda3": true,
33
+ ".condarc": true,
34
+ ".gitconfig": true,
35
+ ".hushlogin": true,
36
+ ".profile": true,
37
+ ".screenrc": true,
38
+ ".sudo_as_admin_successful": true,
39
+ ".zcompdump*": true,
40
+ ".bash_history": true,
41
+ ".bashrc": true,
42
+ ".zsh_history": true,
43
+ ".zshrc": true,
44
+ ".zshenv": true,
45
+ ".zlogin": true,
46
+ ".zprofile": true,
47
+ ".zlogout": true,
48
+ ".python_history": true,
49
+ ".lightningignore": true,
50
+ ".nv": true,
51
+ ".docker": true,
52
+ ".jupyter": true,
53
+ ".lightning": true,
54
+ ".vscode": true,
55
+ ".vscode/**": true,
56
+ ".wget-hsts": true,
57
+ ".vscode-server/**": true,
58
+ ".vscode-server-insiders/**": true,
59
+ ".tmplaigit": true,
60
+ ".viminfo": true
61
+ },
62
+ "files.associations": {
63
+ "*.studiorc": "shellscript"
64
+ },
65
+ "jupyter.kernels.excludePythonEnvironments": [
66
+ "/commands/python3",
67
+ "/commands/python",
68
+ "/bin/python3.10",
69
+ "/usr/bin/python3.10",
70
+ "/bin/python3",
71
+ "/usr/bin/python3",
72
+ "/bin/python3.8",
73
+ "/usr/bin/python3.8"
74
+ ]
75
+ }
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import DEMO_TITLE, IS_SHARE, CV_EXT, EXT_TXT
2
+ from config import CHEAP_API_BASE, CHEAP_API_KEY, CHEAP_MODEL
3
+ from config import STRONG_API_BASE, STRONG_API_KEY, STRONG_MODEL
4
+ from util import is_valid_url
5
+ from util import mylogger
6
+ from taskNonAI import extract_url, file_to_html
7
+ from taskAI import TaskAI
8
+ ## load data
9
+ from data_test import mock_jd, mock_cv
10
+ ## ui
11
+ import gradio as gr
12
+ ## dependency
13
+ from pypandoc.pandoc_download import download_pandoc
14
+ ## std
15
+ import os
16
+
17
+
18
+ logger = mylogger(__name__,'%(asctime)s:%(levelname)s:%(message)s')
19
+ info = logger.info
20
+
21
+ def init():
22
+ os.system("shot-scraper install -b firefox")
23
+ download_pandoc()
24
+
25
+
26
+ def run_refine(api_base, api_key, api_model, jd_info, cv_file: str, cv_text):
27
+ if jd_info:
28
+ if is_valid_url(jd_info):
29
+ jd = extract_url(jd_info)
30
+ else:
31
+ jd = jd_info
32
+ else:
33
+ jd = mock_jd
34
+
35
+ if cv_text:
36
+ cv = cv_text
37
+ elif cv_file:
38
+ if any([cv_file.endswith(ext) for ext in EXT_TXT]):
39
+ with open(cv_file, "r", encoding="utf8") as f:
40
+ cv = f.read()
41
+ else:
42
+ cv = file_to_html(cv_file)
43
+ else:
44
+ cv = mock_cv
45
+ cheapAPI = {"base": api_base, "key": api_key, "model": api_model}
46
+ taskAI = TaskAI(cheapAPI, temperature=0.2, max_tokens=2048) # max_tokens=2048
47
+ info("API initialized")
48
+ gen = (
49
+ taskAI.jd_preprocess(topic="job description", input=jd),
50
+ taskAI.cv_preprocess(input=cv),
51
+ )
52
+ info("tasks initialized")
53
+ result = [""] * 2
54
+ while 1:
55
+ stop: bool = True
56
+ for i in range(len(gen)):
57
+ try:
58
+ result[i] += next(gen[i]).delta
59
+ stop = False
60
+ except StopIteration:
61
+ # info(f"gen[{i}] exhausted")
62
+ pass
63
+ yield result
64
+ if stop:
65
+ info("tasks done")
66
+ break
67
+
68
+ def run_compose(api_base, api_key, api_model, min_jd, min_cv):
69
+ strongAPI = {"base": api_base, "key": api_key, "model": api_model}
70
+ taskAI = TaskAI(strongAPI, temperature=0.5, max_tokens=2048)
71
+ info("API initialized")
72
+
73
+
74
+ with gr.Blocks(
75
+ title=DEMO_TITLE,
76
+ theme=gr.themes.Base(primary_hue="blue", secondary_hue="sky", neutral_hue="slate"),
77
+ ) as demo:
78
+ intro = f"""# {DEMO_TITLE}
79
+ > You provide job description and résumé. I write Cover letter for you!
80
+ Before you use, please setup OpenAI-like API for 2 AI agents': Cheap AI and Strong AI.
81
+ """
82
+ gr.Markdown(intro)
83
+
84
+ with gr.Row():
85
+ with gr.Column(scale=1):
86
+ with gr.Accordion("AI setup (OpenAI-like API)", open=False):
87
+ gr.Markdown(
88
+ "**Cheap AI**, an honest format converter and refinery machine, extracts essential info from job description and résumé, to reduce subsequent cost on Strong AI."
89
+ )
90
+ with gr.Group():
91
+ weak_base = gr.Textbox(
92
+ value=CHEAP_API_BASE, label="API BASE"
93
+ )
94
+ weak_key = gr.Textbox(value=CHEAP_API_KEY, label="API key")
95
+ weak_model = gr.Textbox(value=CHEAP_MODEL, label="Model ID")
96
+ gr.Markdown(
97
+ "---\n**Strong AI**, a thoughtful wordsmith, generates perfect cover letters to make both you and recruiters happy."
98
+ )
99
+ with gr.Group():
100
+ strong_base = gr.Textbox(
101
+ value=STRONG_API_BASE, label="API BASE"
102
+ )
103
+ strong_key = gr.Textbox(
104
+ value=STRONG_API_KEY, label="API key", type="password"
105
+ )
106
+ strong_model = gr.Textbox(value=STRONG_MODEL, label="Model ID")
107
+ with gr.Group():
108
+ gr.Markdown("## Employer - Job Description")
109
+ jd_info = gr.Textbox(
110
+ label="Job Description",
111
+ placeholder="Paste as Full Text (recommmend) or URL (may fail)",
112
+ lines=5,
113
+ )
114
+ with gr.Group():
115
+ gr.Markdown("## Applicant - CV / Résumé")
116
+ with gr.Row():
117
+ cv_file = gr.File(
118
+ label="Allowed formats: " + " ".join(CV_EXT),
119
+ file_count="single",
120
+ file_types=CV_EXT,
121
+ type="filepath",
122
+ )
123
+ cv_text = gr.TextArea(
124
+ label="Or enter text",
125
+ placeholder="If attempting to both upload a file and enter text, only this text will be used.",
126
+ )
127
+ with gr.Column(scale=2):
128
+ gr.Markdown("## Result")
129
+ with gr.Row():
130
+ min_jd = gr.TextArea(label="Minimized Job Description")
131
+ min_cv = gr.TextArea(label="Minimized CV / Résumé")
132
+ cover_letter_text = gr.TextArea(label="Cover Letter")
133
+ cover_letter_pdf = gr.File(
134
+ label="Cover Letter PDF",
135
+ file_count="single",
136
+ file_types=[".pdf"],
137
+ type="filepath",
138
+ )
139
+ infer_btn = gr.Button("Go!", variant="primary")
140
+ infer_btn.click(
141
+ fn=run_refine,
142
+ inputs=[weak_base, weak_key, weak_model, jd_info, cv_file, cv_text],
143
+ outputs=[min_jd, min_cv],
144
+ concurrency_limit=5,
145
+ )
146
+
147
+
148
+ if __name__ == "__main__":
149
+ init()
150
+ demo.queue(max_size=10).launch(
151
+ show_error=True, debug=True, share=IS_SHARE
152
+ )
config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") or "https://api.openai.com/v1"
4
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or ""
5
+
6
+ CHEAP_API_BASE = os.getenv("CHEAP_API_BASE") or OPENAI_API_BASE
7
+ CHEAP_API_KEY = os.getenv("CHEAP_API_KEY") or OPENAI_API_KEY
8
+ CHEAP_MODEL = os.getenv("CHEAP_MODEL") or "gpt-3.5-turbo"
9
+
10
+ STRONG_API_BASE = os.getenv("STRONG_API_BASE") or OPENAI_API_BASE
11
+ STRONG_API_KEY = os.getenv("STRONG_API_KEY") or OPENAI_API_KEY
12
+ STRONG_MODEL = os.getenv("STRONG_MODEL") or "mixtral-8x7b"
13
+
14
+ IS_SHARE = bool(os.getenv("IS_SHARE")) or False
15
+
16
+ DEMO_TITLE = "Cover Letter Generator"
17
+ DEMO_DESCRIPTION = "This is a demo of the OpenAI API for generating cover letters. The model is trained on a dataset of cover letters and job descriptions, and generates a cover letter based on the job description and the applicant's CV. The model is fine-tuned on the OpenAI API, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV. The model is able to generate cover letters for a wide range of jobs, and is able to generate cover letters that are tailored to the job description and the applicant's CV."
18
+
19
+ CV_EXT = [".typ", ".tex", ".html", ".docx", ".rst", ".rtf", ".odt", ".txt", ".md"]
20
+ EXT_TXT = [".txt", ".md"]
config_secret.tmpl.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from util import zip_api
2
+
3
+ api_test = zip_api()
data_test.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mock_jd = """
2
+ Queen of Hearts' Garden - Card Guards Job Description
3
+
4
+ The Queen of Hearts' Garden is seeking a compassionate and highly motivated individual to join our team of Card Guards. As a Card Guard, you will be the guardian of the hearts of our beloved playing cards, ensuring their safety and integrity.
5
+
6
+ Responsibilities:
7
+
8
+ Guardian of the Cards: You will be responsible for safeguarding the Queen's precious playing cards from damage, loss, or misuse. This includes meticulously shuffling, dealing, and returning the cards to their proper place after use.
9
+ Keeper of Secrets: You will be privy to the secrets of the cards, their values, and their histories. This knowledge will allow you to provide a more engaging and personalized experience for our guests.
10
+ Master of the Deal: You will be adept at dealing cards with precision and fairness, ensuring that each player has an equal opportunity to win.
11
+ Embracer of Joy: You will be the embodiment of joy and happiness, spreading smiles across the faces of our guests as you interact with them and ensure their cards are treated with the utmost respect.
12
+
13
+ Qualifications:
14
+
15
+ Passion for Playing Cards: You have a deep love for playing cards and a strong understanding of their rules and history.
16
+ Exceptional Customer Service: You are able to interact with guests professionally and with empathy, ensuring their experience is enjoyable and memorable.
17
+ Attention to Detail: You have a meticulous eye for detail and are able to handle delicate cards with the utmost care.
18
+ Strong Communication Skills: You are able to communicate clearly and effectively with guests, resolving any issues with patience and understanding.
19
+
20
+ Additional Desired Qualities:
21
+
22
+ Creativity: You have a creative spirit and are able to find innovative solutions to challenges.
23
+ Flexibility: You are adaptable and can easily adjust to new situations and changes.
24
+ Positive Attitude: You have a positive attitude and are able to spread joy to others.
25
+
26
+ Please send your resume and cover letter to [email address] to apply. We look forward to meeting you and learning more about your passion for playing cards and your ability to bring joy to our guests.
27
+ """
28
+ mock_cv = """
29
+ Dorothy Gale
30
+
31
+ 123 Main St., Emerald City, KS 12345, (123) 456-7890, dorothy@wizardofoz.com
32
+
33
+ Summary
34
+
35
+ Highly motivated and resourceful individual with a strong work ethic and a positive attitude. Proven ability to manage multiple tasks effectively and work independently. Skilled in a variety of industries, including retail, hospitality, and education.
36
+
37
+ Skills
38
+
39
+ Strong communication and interpersonal skills
40
+ Excellent organizational and time management skills
41
+ Proficiency in Microsoft Office Suite
42
+ Strong customer service orientation
43
+ Ability to work independently and as part of a team
44
+ Creativity and problem-solving abilities
45
+
46
+ Experience
47
+
48
+ Teacher Emerald City Elementary School, Emerald City, KS 2000-2006
49
+
50
+ Provided a safe and engaging learning environment for students
51
+ Developed and implemented lesson plans that aligned with curriculum standards
52
+ Mentored and guided other teachers
53
+
54
+ Retail Sales Associate The Ruby Slipper Shop, Emerald City, KS 1998-2000
55
+
56
+ Provided excellent customer service to clients
57
+ Managed inventory and maintained store displays
58
+ Achieved high sales goals
59
+
60
+ Education
61
+
62
+ Bachelor of Arts in Education, University of Kansas Master of Arts in Education, University of Kansas
63
+
64
+ References
65
+
66
+ Available upon request.
67
+ """
pyproject.toml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ # Exclude a variety of commonly ignored directories.
3
+ exclude = [
4
+ ".bzr",
5
+ ".direnv",
6
+ ".eggs",
7
+ ".git",
8
+ ".git-rewrite",
9
+ ".hg",
10
+ ".ipynb_checkpoints",
11
+ ".mypy_cache",
12
+ ".nox",
13
+ ".pants.d",
14
+ ".pyenv",
15
+ ".pytest_cache",
16
+ ".pytype",
17
+ ".ruff_cache",
18
+ ".svn",
19
+ ".tox",
20
+ ".venv",
21
+ ".vscode",
22
+ "__pypackages__",
23
+ "_build",
24
+ "buck-out",
25
+ "build",
26
+ "dist",
27
+ "node_modules",
28
+ "site-packages",
29
+ "venv",
30
+ ".local",
31
+ ]
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # UI
2
+ gradio>=4.0,<=5.0
3
+ # external tool
4
+ pypandoc
5
+ shot-scraper
6
+ # LLM related
7
+ llama-index-llms-openai-like
8
+ tiktoken
9
+ # lib
10
+ requests
11
+ # dev tools
12
+ ruff
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ IS_SHARE=1 python app.py
setup.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pip install -r requirements.txt
2
+ ruff check
3
+ ruff format
taskAI.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.llms.openai_like import OpenAILike
2
+ from llama_index.core.llms import ChatMessage # , MessageRole
3
+ from llama_index.core import ChatPromptTemplate
4
+
5
+ from util import mylogger
6
+
7
+ logger = mylogger(__name__,'%(asctime)s:%(levelname)s:%(message)s')
8
+ info = logger.info
9
+
10
+
11
+ ## define templates
12
+
13
+ ### topic,input
14
+ JD_PREPROCESS = ChatPromptTemplate(
15
+ [
16
+ ChatMessage(
17
+ role="system",
18
+ content="You are a content extractor. You never paraphrase; you only reduce content at the sentence level. Your mission is to extract information directly related to {topic} from user input. Make sure output contains complete information.",
19
+ ),
20
+ ChatMessage(role="user", content="{input}"),
21
+ ]
22
+ )
23
+
24
+ ### input
25
+ CV_PREPROCESS = ChatPromptTemplate(
26
+ [
27
+ ChatMessage(
28
+ role="system",
29
+ content="You are an AI text converter alternative to pandoc. Your mission is to convert the input content into markdown. Regarding styles, only keep headers, lists and links, and remove other styles.",
30
+ ),
31
+ ChatMessage(role="user", content="{input}"),
32
+ ]
33
+ )
34
+
35
+ ## basic func
36
+
37
+
38
+ def oai(base: str, key: str, model: str, **kwargs) -> OpenAILike:
39
+ return OpenAILike(
40
+ api_base=base,
41
+ api_key=key,
42
+ model=model,
43
+ is_chat_model=True,
44
+ context_window=window_size,
45
+ **kwargs,
46
+ )
47
+
48
+
49
+ ## tasks
50
+ class TaskAI(OpenAILike):
51
+ def __init__(self, api: dict[str, str], **kwargs):
52
+ def guess_window_size(model=api["model"]):
53
+ _mid = model.lower()
54
+ windows: dict = {
55
+ 8000: ["gemma", "8k"],
56
+ 16000: ["16k"],
57
+ 32000: ["mistral", "mixtral", "32k"],
58
+ }
59
+ window_size = 3900
60
+ for ws, names in windows.items():
61
+ if any([n in _mid for n in names]):
62
+ window_size = ws
63
+ info(f"use context window size: {window_size} for {model}")
64
+ return window_size
65
+
66
+ super().__init__(
67
+ api_base=api["base"], api_key=api["key"], model=api["model"], is_chat_model=True, context_window=guess_window_size(), **kwargs
68
+ )
69
+
70
+ def jd_preprocess(self, topic: str, input: str):
71
+ return self.stream_chat(JD_PREPROCESS.format_messages(topic=topic, input=input))
72
+
73
+ def cv_preprocess(self, input: str):
74
+ return self.stream_chat(CV_PREPROCESS.format_messages(input=input))
taskNonAI.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pypandoc
2
+ ## stdlib
3
+ import subprocess
4
+ import json
5
+ from typing import Optional
6
+
7
+ def file_to_html(file_path: str) -> str:
8
+ return pypandoc.convert_file(file_path, "html")
9
+
10
+
11
+ def extract_url(url: str) -> Optional[str]:
12
+ cmd = f"""shot-scraper javascript -b firefox \
13
+ "{url}" "
14
+ async () => {{
15
+ const readability = await import('https://cdn.skypack.dev/@mozilla/readability');
16
+ return (new readability.Readability(document)).parse();
17
+ }}"
18
+ """
19
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
20
+ try:
21
+ result.check_returncode()
22
+ except:
23
+ raise Exception(
24
+ f"Please try copy-paste as input. Failed to extract content from url: {url}. Error: {result.stderr}"
25
+ )
26
+ result = json.loads(result.stdout)
27
+ try:
28
+ return result["textContent"]
29
+ except:
30
+ raise Exception(
31
+ f"Please try copy-paste as input. Failed to extract content from: {url}. Didn't find content from given URL!"
32
+ )
test.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from taskAI import TaskAI
2
+ from data_test import mock_jd, mock_cv
3
+ from config_secret import api_test
4
+
5
+ from llama_index.llms.openai_like import OpenAILike
6
+ from llama_index.core.llms import ChatMessage
7
+
8
+
9
+ def test_integration():
10
+ messages = [
11
+ ChatMessage(role="system", content="You are a helpful assistant"),
12
+ ChatMessage(role="user", content="What is your name"),
13
+ ]
14
+ print("Testing integration:")
15
+ response = OpenAILike(
16
+ model=api_test["model"],
17
+ api_key=api_test["key"],
18
+ api_base=api_test["base"],
19
+ max_retries=0,
20
+ is_chat_model=True,
21
+ ).chat(messages)
22
+ print(response)
23
+
24
+ def test_taskAI():
25
+ taskAI = TaskAI(api_test)
26
+ gen = taskAI.cv_preprocess(mock_cv)
27
+ for chunk in gen:
28
+ print(chunk)
29
+
30
+ if __name__ == "__main__":
31
+
32
+ # integration()
util.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tiktoken
2
+
3
+ from urllib.parse import urlparse
4
+ import requests
5
+
6
+ import logging
7
+
8
+ def mylogger(name, format, level=logging.INFO):
9
+ # Create a custom logger
10
+ logger = logging.getLogger("custom_logger")
11
+ logger.setLevel(level)
12
+ # Configure the custom logger with the desired settings
13
+ formatter = logging.Formatter(format)
14
+ c_handler = logging.StreamHandler()
15
+ c_handler.setFormatter(formatter)
16
+ # file_handler = logging.FileHandler('custom_logs.log')
17
+ # file_handler.setFormatter(formatter)
18
+ logger.addHandler(c_handler)
19
+
20
+ return logger
21
+
22
+
23
+ def count_token(text, encoding="cl100k_base"):
24
+ return len(tiktoken.get_encoding(encoding).encode(text))
25
+
26
+
27
+ def is_valid_url(url: str) -> bool:
28
+ try:
29
+ result = urlparse(url)
30
+ return all([result.scheme, result.netloc])
31
+ except ValueError:
32
+ return False
33
+
34
+
35
+ def is_valid_openai_api_key(api_base:str, api_key: str)->bool:
36
+ headers = {"Authorization": f"Bearer {api_key}"}
37
+
38
+ response = requests.get(api_base, headers=headers)
39
+
40
+ return response.status_code == 200
41
+
42
+
43
+ def zip_api(api_base:str, api_key:str, model:str)->dict[str, str]:
44
+ return {"base": api_base, "key": api_key, "model": model}