import gradio as gr from huggingface_hub import InferenceClient import PyPDF2 from langchain.prompts import PromptTemplate # Initialize the Hugging Face client with gemma2-9b-it client = InferenceClient("HuggingFaceH4/gemma2-9b-it") # Function to read text from a PDF file def pdf_to_text(pdf_path): with open(pdf_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = '' for page in range(len(pdf_reader.pages)): text += pdf_reader.pages[page].extract_text() return text # Function to analyze CV content def analyze_cv(cv_text): if not cv_text or not isinstance(cv_text, str): raise ValueError("The CV text must be a non-empty string.") prompt_template = PromptTemplate.from_template(''' You are an AI designed to extract structured information from unstructured text. Your task is to analyze the content of a candidate's resume or CV and extract the following details: **CV** {cv_text} **Information Extraction and Output Format** For the given resume, extract and present the following details in the specified format: 1. Candidate Information - Full Name - Contact Information (Phone, Email, Address, etc.) - Date of Birth (if available) - Habitat (if specified, e.g., location, region, or country of residence) 2. Education - Degree Name (e.g., Bachelor's, Master's, Ph.D.) - Field of Study (e.g., Computer Science, Business Administration) - Institution Name - Year(s) of Graduation 3. Professional Experience - For each job extract: - Job Title - Company Name - Duration (start and end dates, or years of experience) - Summary of Key Responsibilities and Achievements 4. Skills - List of Skills (include technical, soft, and industry-specific skills mentioned in the resume) 5. Certifications - Certification Name - Issuing Organization - Year of Issuance 6. Language Proficiency - Languages Mentioned (include proficiency levels if specified in the resume) Do not explain, comment or make up any more information that is not relative to the list of Information extraction. Respond in Vietnamese. Let's work this out in a step by step way to ensure the correct answer. [END]. ''') prompt = prompt_template.format(cv_text=cv_text) response = client.text_generation(prompt, max_tokens=2048, temperature=0.0) return response # Chatbot with PDF and CV analysis def chatbot_with_pdf(pdf_file, user_message, history, system_message, max_tokens, temperature, top_p): if pdf_file is not None: pdf_text = pdf_to_text(pdf_file.name) cv_analysis = analyze_cv(pdf_text) # Call analyze_cv with the extracted PDF text user_message = f"CV Analysis:\n{cv_analysis}\n\nUser Message:\n{user_message}" response_gen = respond( user_message, history, system_message, max_tokens, temperature, top_p ) return list(response_gen)[-1], history + [(user_message, "")] # Define Gradio interface interface = gr.Interface( fn=chatbot_with_pdf, inputs=[ gr.File(label="Upload a PDF File"), gr.Textbox(label="Your Message"), gr.State(label="Chat History"), gr.Textbox(label="System Message", value="You are an AI assistant."), gr.Slider(label="Max Tokens", minimum=1, maximum=1000, value=200), gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.7, step=0.1), gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1), ], outputs=[ gr.Textbox(label="Response"), gr.State(label="Chat History"), ], title="Chatbot with CV Analysis and PDF Integration", ) # Launch Gradio app interface.launch()