| | import streamlit as st |
| | import google.generativeai as genai |
| | from PIL import Image |
| | import fitz |
| | from docx import Document |
| | import json |
| | from pathlib import Path |
| | from datetime import datetime |
| | import re |
| | import pytesseract |
| | import io |
| |
|
| | def extract_text_from_pdf(pdf_file): |
| | """Extract text from uploaded PDF file.""" |
| | text_content = [] |
| | try: |
| | pdf_bytes = pdf_file.read() |
| | doc = fitz.open(stream=pdf_bytes, filetype="pdf") |
| | for page_num in range(len(doc)): |
| | page = doc[page_num] |
| | text_content.append(page.get_text()) |
| | return "\n".join(text_content) |
| | except Exception as e: |
| | st.error(f"Error in PDF extraction: {str(e)}") |
| | return "" |
| |
|
| | def extract_text_from_docx(docx_file): |
| | """Extract text from uploaded DOCX file.""" |
| | try: |
| | doc = Document(docx_file) |
| | text_content = [] |
| | for paragraph in doc.paragraphs: |
| | text_content.append(paragraph.text) |
| | return "\n".join(text_content) |
| | except Exception as e: |
| | st.error(f"Error in DOCX extraction: {str(e)}") |
| | return "" |
| |
|
| | def parse_date(date_str): |
| | """Parse date from various formats.""" |
| | try: |
| | |
| | if date_str.lower() in ['present', 'current', 'now']: |
| | return datetime.now() |
| |
|
| | date_str = date_str.strip() |
| |
|
| | formats = [ |
| | '%Y', '%b %Y', '%B %Y', '%m/%Y', '%m-%Y', |
| | '%Y/%m', '%Y-%m' |
| | ] |
| |
|
| | for fmt in formats: |
| | try: |
| | return datetime.strptime(date_str, fmt) |
| | except ValueError: |
| | continue |
| |
|
| | year_match = re.search(r'\b20\d{2}\b', date_str) |
| | if year_match: |
| | return datetime.strptime(year_match.group(), '%Y') |
| |
|
| | return None |
| | except Exception: |
| | return None |
| |
|
| | def calculate_experience(work_history): |
| | """Calculate total years of experience from work history.""" |
| | total_experience = 0 |
| | current_year = datetime.now().year |
| |
|
| | for job in work_history: |
| | duration = job.get('duration', '') |
| | if not duration: |
| | continue |
| |
|
| | parts = re.split(r'\s*-\s*|\s+to\s+', duration) |
| | if len(parts) != 2: |
| | continue |
| |
|
| | start_date = parse_date(parts[0]) |
| | end_date = parse_date(parts[1]) |
| |
|
| | if start_date and end_date: |
| | years = (end_date.year - start_date.year) + \ |
| | (end_date.month - start_date.month) / 12 |
| | total_experience += max(0, years) |
| |
|
| | return round(total_experience, 1) |
| |
|
| | def parse_resume(file_uploaded, api_key): |
| | """Parse resume and extract information.""" |
| | genai.configure(api_key=api_key) |
| | model = genai.GenerativeModel('gemini-1.5-flash') |
| |
|
| | prompt = """Extract the following information from this resume: |
| | 1. Summarize the following resume in 100 words, focusing on key skills, experience, and qualifications |
| | 2. Full Name |
| | 3. Email Address |
| | 4. Phone Number |
| | 5. Education History (including degree, institution, graduation year, and field of study) |
| | 6. Companies worked at with positions and EXACT duration (e.g., "Jan 2020 - Present" or "2018-2020") |
| | 7. Skills |
| | 8. LinkedIn Profile URL |
| | Return the information in this JSON format: |
| | { |
| | "summary": "", |
| | "name": "", |
| | "email": "", |
| | "phone": "", |
| | "education": [ |
| | { |
| | "degree": "", |
| | "institution": "", |
| | "year": "", |
| | "field": "", |
| | "gpa": "" |
| | } |
| | ], |
| | "work_experience": [ |
| | { |
| | "company": "", |
| | "position": "", |
| | "duration": "" |
| | } |
| | ], |
| | "skills": [], |
| | "linkedin": "" |
| | } |
| | For skills include tools and technologies in output if present any in resume. |
| | For work experience durations, please specify exact dates in format: "MMM YYYY - MMM YYYY" or "YYYY - Present" , please return in one order either in ascending or descending. |
| | Only return the JSON object, nothing else. If any field is not found, leave it empty.""" |
| |
|
| | try: |
| | file_extension = Path(file_uploaded.name).suffix.lower() |
| | |
| | if file_extension == '.pdf': |
| | text_content = extract_text_from_pdf(file_uploaded) |
| | elif file_extension in ['.docx', '.doc']: |
| | text_content = extract_text_from_docx(file_uploaded) |
| | elif file_extension in ['.jpg', '.jpeg', '.png']: |
| | image = Image.open(file_uploaded) |
| | text_content = pytesseract.image_to_string(image) |
| | else: |
| | st.error(f"Unsupported file format: {file_extension}") |
| | return None |
| |
|
| | response = model.generate_content(f"{prompt}\n\nResume Text:\n{text_content}") |
| | |
| | try: |
| | response_text = response.text |
| | json_start = response_text.find('{') |
| | json_end = response_text.rfind('}') + 1 |
| | json_str = response_text[json_start:json_end] |
| | |
| | result = json.loads(json_str) |
| | total_exp = calculate_experience(result.get('work_experience', [])) |
| | result['total_years_experience'] = total_exp |
| | |
| | return result |
| | except json.JSONDecodeError as e: |
| | st.error(f"Error parsing response: {str(e)}") |
| | return None |
| |
|
| | except Exception as e: |
| | st.error(f"Error processing resume: {str(e)}") |
| | return None |
| |
|
| | def format_education(edu): |
| | """Format education details for display.""" |
| | parts = [] |
| | if edu.get('degree'): |
| | parts.append(edu['degree']) |
| | if edu.get('field'): |
| | parts.append(f"in {edu['field']}") |
| | if edu.get('institution'): |
| | parts.append(f"from {edu['institution']}") |
| | if edu.get('year'): |
| | parts.append(f"({edu['year']})") |
| | if edu.get('gpa') and edu['gpa'].strip(): |
| | parts.append(f"- GPA: {edu['gpa']}") |
| | return " ".join(parts) |
| |
|
| | def main(): |
| | st.title("Resume Parser") |
| | st.write("Upload a resume (PDF, DOCX, or Image) to extract information") |
| |
|
| | |
| | api_key = st.secrets["GEMINI_API_KEY"] if "GEMINI_API_KEY" in st.secrets else st.text_input("Enter Gemini API Key", type="password") |
| |
|
| | uploaded_file = st.file_uploader("Choose a resume file", type=["pdf", "docx", "doc", "jpg", "jpeg", "png"]) |
| |
|
| | if uploaded_file and api_key: |
| | with st.spinner('Analyzing resume...'): |
| | result = parse_resume(uploaded_file, api_key) |
| |
|
| | if result: |
| | st.subheader("Extracted Information") |
| | |
| | |
| | st.text_area("Summary", result.get('summary', 'Not found'), height=100) |
| | |
| | |
| | col1, col2, col3 = st.columns(3) |
| | with col1: |
| | st.write("**Name:**", result.get('name', 'Not found')) |
| | with col2: |
| | st.write("**Email:**", result.get('email', 'Not found')) |
| | with col3: |
| | st.write("**Phone:**", result.get('phone', 'Not found')) |
| |
|
| | |
| | total_exp = result.get('total_years_experience', 0) |
| | exp_text = f"{total_exp:.1f} years" if total_exp >= 1 else f"{total_exp * 12:.0f} months" |
| | st.write("**Total Experience:**", exp_text) |
| |
|
| | |
| | st.subheader("Education") |
| | if result.get('education'): |
| | for edu in result['education']: |
| | st.write(f"- {format_education(edu)}") |
| | else: |
| | st.write("No education information found") |
| |
|
| | |
| | st.subheader("Work Experience") |
| | if result.get('work_experience'): |
| | for exp in result['work_experience']: |
| | duration = f" ({exp.get('duration', 'Duration not specified')})" if exp.get('duration') else "" |
| | st.write(f"- {exp.get('position', 'Role not found')} at {exp.get('company', 'Company not found')}{duration}") |
| | else: |
| | st.write("No work experience found") |
| |
|
| | |
| | st.subheader("Skills:") |
| | if result.get('skills'): |
| | for skill in result['skills']: |
| | st.write(f"- {skill}") |
| | else: |
| | st.write("- No skills found") |
| |
|
| | |
| | st.write("**LinkedIn Profile:**", result.get('linkedin', 'Not found')) |
| |
|
| | if __name__ == "__main__": |
| | main() |