AutoMailAI / utils.py
krishbakshi's picture
Update utils.py
6032d94 verified
import json
import os
import fitz # PyMuPDF for PDF parsing
import docx
from google import genai
from google.genai import types
# API key as envirnomental varibale
api_key = os.environ.get("GOOGLE_API_KEY")
# Function to extract text from PDFs
def extract_text_from_pdf(file_path):
text = ""
with fitz.open(file_path) as doc:
for page in doc:
text += page.get_text()
return text
# Function to extract text from DOCX files
def extract_text_from_docx(file_path):
doc = docx.Document(file_path)
return "\n".join([para.text for para in doc.paragraphs])
def process_resume(file_path):
if file_path.endswith(".pdf"):
resume_text = extract_text_from_pdf(file_path)
elif file_path.endswith(".docx"):
resume_text = extract_text_from_docx(file_path)
else:
print(f"Skipping unsupported file: {file_path}")
return None
# Extract structured resume data
structured_data = extract_resume_data(resume_text)
return structured_data
# Function to parse resume and extract structured data using Gemini
def extract_resume_data(resume_text):
prompt = f"""
Extract the following details from the resume:
- Full Name(first word capital, eg: "Krish Bakshi", "Uzumaki Naruto")
- Email
- Phone Number
- Work Experience (Company, Role, Contribution and work)
- Projects(Project, Tech Stack/Tehnology)
- Education (Degree, University, Year) /(Only first word capital)/
- Skills
Provide the data in a structured JSON format.
Resume:
{resume_text}
"""
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt,
)
return response.text # Gemini will return structured JSON
# Response Pipeline
def generate_email(prompt, template):
client = genai.Client(api_key=api_key)
response = client.models.generate_content(
model="gemini-2.0-flash",
config=types.GenerateContentConfig(system_instruction=template),
contents=[prompt],
)
return response.text