cv_quality / cv_quality.py
Nassiraaa's picture
Update cv_quality.py
c02a423 verified
import os
import requests
from io import BytesIO
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, UnstructuredFileLoader
from dotenv import load_dotenv
from yolo_text_extraction import pdf_to_text
load_dotenv()
class CV:
def __init__(self, file_url):
self.file_url = file_url
self.doc_loader = {
".pdf": PyPDFLoader,
".docx": Docx2txtLoader,
".txt": UnstructuredFileLoader
}
def get_cv_text(self):
# Download the file from Supabase
response = requests.get(self.file_url)
file_content = BytesIO(response.content)
# Determine file extension
_, ext = os.path.splitext(self.file_url.split("?")[0]) # Remove query parameters
if ext.lower() in self.doc_loader:
if ext.lower() == '.pdf':
loader = self.doc_loader[ext.lower()](file_content)
else:
# For non-PDF files, save temporarily and use the appropriate loader
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
temp_file.write(file_content.getvalue())
temp_file_path = temp_file.name
loader = self.doc_loader[ext.lower()](temp_file_path)
data = loader.load()
if ext.lower() != '.pdf':
os.unlink(temp_file_path) # Delete the temporary file
if data:
text = "\n".join([page.page_content for page in data])
return text if text.strip() else pdf_to_text(file_content)
else:
return pdf_to_text(file_content)
else:
return pdf_to_text(file_content)
def analyse_cv_quality(self):
from cv_analyzer import analyze_cv
cv_text = self.get_cv_text()
result = analyze_cv(cv_text)
return result