Spaces:
Configuration error
Configuration error
'''Functions for summarizing and formatting job calls.''' | |
import os | |
import json | |
import logging | |
import unicodedata | |
from pathlib import Path | |
from datetime import datetime | |
from openai import OpenAI | |
from configuration import ( | |
INFERENCE_URL, | |
SUMMARIZER_MODEL, | |
JOB_CALL_EXTRACTION_PROMPT | |
) | |
from functions.helper import clean_text_whitespace | |
# pylint: disable=broad-exception-caught | |
def summarize_job_call(job_call: str) -> str: | |
'''Extracts and summarizes key information from job call. | |
Args: | |
job_call (str): Job call text to summarize. Must be provided and non-empty. | |
Returns: | |
str: Summarized job call information, or None if summarization fails | |
''' | |
logger = logging.getLogger(f'{__name__}.summarize_job_call') | |
# Clean up the job call text | |
job_call = unicodedata.normalize('NFKC', job_call) | |
job_call = clean_text_whitespace(job_call) | |
client = OpenAI( | |
base_url=INFERENCE_URL, | |
api_key=os.environ.get("API_KEY", "dummy-key-for-testing") | |
) | |
messages = [ | |
{ | |
'role': 'system', | |
'content': f'{JOB_CALL_EXTRACTION_PROMPT}{job_call}' | |
}, | |
{ | |
'role': 'user', | |
'content': f'JOB CALL\n{job_call}' | |
} | |
] | |
completion_args = { | |
'model': SUMMARIZER_MODEL, | |
'messages': messages, | |
} | |
try: | |
response = client.chat.completions.create(**completion_args) | |
except Exception as e: | |
response = None | |
logger.error('Error during job summarization API call: %s', e) | |
if response is not None: | |
summary = response.choices[0].message.content | |
try: | |
print(summary) | |
summary = json.loads(summary) | |
print(summary.keys()) | |
except json.JSONDecodeError as e: | |
logger.error("Failed to parse job call summary JSON: %s", e) | |
# Save the extracted job call information to data directory | |
try: | |
_save_job_call_data(summary) | |
except Exception as save_error: | |
logger.warning("Failed to save job call data: %s", str(save_error)) | |
else: | |
summary = None | |
return summary | |
def _save_job_call_data(extracted_summary: str) -> None: | |
""" | |
Save job call data (original and extracted summary) to the data/job_calls directory. | |
Args: | |
extracted_summary (str): The extracted/summarized job call information | |
""" | |
logger = logging.getLogger(f'{__name__}._save_job_call_data') | |
try: | |
# Get the project root directory and job_calls subdirectory | |
project_root = Path(__file__).parent.parent | |
job_calls_dir = project_root / "data" / "job_calls" | |
# Create job_calls directory if it doesn't exist | |
job_calls_dir.mkdir(parents=True, exist_ok=True) | |
# Create timestamped filename | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"job_call_extracted_{timestamp}.json" | |
file_path = job_calls_dir / filename | |
# Save to JSON file | |
with open(file_path, 'w', encoding='utf-8') as output_file: | |
json.dump(extracted_summary, output_file) | |
except Exception as e: | |
logger.error("Error saving job call data: %s", str(e)) | |