resumate / functions /job_call.py
gperdrizet's picture
Added GitHub repository selector
aa26954 verified
'''Functions for summarizing and formatting job calls.'''
import os
import json
import logging
import unicodedata
from pathlib import Path
from datetime import datetime
from openai import OpenAI
from configuration import (
INFERENCE_URL,
SUMMARIZER_MODEL,
JOB_CALL_EXTRACTION_PROMPT
)
from functions.helper import clean_text_whitespace
# pylint: disable=broad-exception-caught
def summarize_job_call(job_call: str) -> str:
'''Extracts and summarizes key information from job call.
Args:
job_call (str): Job call text to summarize. Must be provided and non-empty.
Returns:
str: Summarized job call information, or None if summarization fails
'''
logger = logging.getLogger(f'{__name__}.summarize_job_call')
# Clean up the job call text
job_call = unicodedata.normalize('NFKC', job_call)
job_call = clean_text_whitespace(job_call)
client = OpenAI(
base_url=INFERENCE_URL,
api_key=os.environ.get("API_KEY", "dummy-key-for-testing")
)
messages = [
{
'role': 'system',
'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}'
},
{
'role': 'user',
'content': f'JOB CALL\n{job_call}'
}
]
completion_args = {
'model': SUMMARIZER_MODEL,
'messages': messages,
}
try:
response = client.chat.completions.create(**completion_args)
except Exception as e:
response = None
logger.error('Error during job summarization API call: %s', e)
if response is not None:
summary = response.choices[0].message.content
try:
print(summary)
summary = json.loads(summary)
print(summary.keys())
except json.JSONDecodeError as e:
logger.error("Failed to parse job call summary JSON: %s", e)
# Save the extracted job call information to data directory
try:
_save_job_call_data(summary)
except Exception as save_error:
logger.warning("Failed to save job call data: %s", str(save_error))
else:
summary = None
return summary
def _save_job_call_data(extracted_summary: str) -> None:
"""
Save job call data (original and extracted summary) to the data/job_calls directory.
Args:
extracted_summary (str): The extracted/summarized job call information
"""
logger = logging.getLogger(f'{__name__}._save_job_call_data')
try:
# Get the project root directory and job_calls subdirectory
project_root = Path(__file__).parent.parent
job_calls_dir = project_root / "data" / "job_calls"
# Create job_calls directory if it doesn't exist
job_calls_dir.mkdir(parents=True, exist_ok=True)
# Create timestamped filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"job_call_extracted_{timestamp}.json"
file_path = job_calls_dir / filename
# Save to JSON file
with open(file_path, 'w', encoding='utf-8') as output_file:
json.dump(extracted_summary, output_file)
except Exception as e:
logger.error("Error saving job call data: %s", str(e))