Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import fitz # PyMuPDF for PDF parsing | |
| import docx | |
| from google import genai | |
| from google.genai import types | |
| # API key as envirnomental varibale | |
| api_key = os.environ.get("GOOGLE_API_KEY") | |
| # Function to extract text from PDFs | |
| def extract_text_from_pdf(file_path): | |
| text = "" | |
| with fitz.open(file_path) as doc: | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Function to extract text from DOCX files | |
| def extract_text_from_docx(file_path): | |
| doc = docx.Document(file_path) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| def process_resume(file_path): | |
| if file_path.endswith(".pdf"): | |
| resume_text = extract_text_from_pdf(file_path) | |
| elif file_path.endswith(".docx"): | |
| resume_text = extract_text_from_docx(file_path) | |
| else: | |
| print(f"Skipping unsupported file: {file_path}") | |
| return None | |
| # Extract structured resume data | |
| structured_data = extract_resume_data(resume_text) | |
| return structured_data | |
| # Function to parse resume and extract structured data using Gemini | |
| def extract_resume_data(resume_text): | |
| prompt = f""" | |
| Extract the following details from the resume: | |
| - Full Name(first word capital, eg: "Krish Bakshi", "Uzumaki Naruto") | |
| - Phone Number | |
| - Work Experience (Company, Role, Contribution and work) | |
| - Projects(Project, Tech Stack/Tehnology) | |
| - Education (Degree, University, Year) /(Only first word capital)/ | |
| - Skills | |
| Provide the data in a structured JSON format. | |
| Resume: | |
| {resume_text} | |
| """ | |
| client = genai.Client(api_key=api_key) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| contents=prompt, | |
| ) | |
| return response.text # Gemini will return structured JSON | |
| # Response Pipeline | |
| def generate_email(prompt, template): | |
| client = genai.Client(api_key=api_key) | |
| response = client.models.generate_content( | |
| model="gemini-2.0-flash", | |
| config=types.GenerateContentConfig(system_instruction=template), | |
| contents=[prompt], | |
| ) | |
| return response.text | |