import requests from bs4 import BeautifulSoup import re import os from PyPDF2 import PdfReader def read_pdf_text(pdf_file): # Reading the pdf pdf_reader = PdfReader(pdf_file) all_text = "" # make it limited. min(5, len(pages)) for page in pdf_reader.pages: all_text += page.extract_text() return all_text def parse_linkedin_pdf(pdf_text): sections = re.split(r'\n(?=\b(?:Experience|Contact|Education|Top Skills|Languages|Honors-Awards)\b)', pdf_text) parsed_data = {} for section in sections: lines = section.split('\n') section_name = lines[0] section_text = '\n'.join(lines[1:]) parsed_data[section_name] = section_text return parsed_data