Spaces:

harshagnihotri14
/

SOP_Generator

Sleeping

File size: 8,783 Bytes

719cf49
d0245d4
0891b5b
 
4c3cb67
430ede3
6512f81
719cf49
a13d626
52f876e
a13d626
 
 
 
 
430ede3
 
36e77cf
52f876e
0891b5b
430ede3
 
 
 
 
4c3cb67
 
 
6512f81
430ede3
 
 
 
 
 
 
4c3cb67
 
 
430ede3
 
4c3cb67
430ede3
52f876e
430ede3
4c3cb67
 
 
 
 
52f876e
 
430ede3
 
 
52f876e
f00b78f
 
430ede3
f00b78f
 
 
 
 
 
 
 
 
 
 
430ede3
f00b78f
 
430ede3
f00b78f
 
 
 
0eed4f9
4c3cb67
0eed4f9
4c3cb67
52f876e
 
4c3cb67
52f876e
4c3cb67
6512f81
430ede3
52f876e
430ede3
 
7e80be1
f00b78f
 
 
 
 
 
 
 
7e80be1
430ede3
 
 
 
 
 
 
 
f00b78f
 
52f876e
 
 
 
4c3cb67
6512f81
 
430ede3
52f876e
 
 
430ede3
 
f00b78f
 
430ede3
 
f00b78f
 
52f876e
430ede3
52f876e
 
 
 
 
f00b78f
 
7e80be1
f00b78f
7e80be1
f00b78f
52f876e
bfd8d3b
52f876e
bfd8d3b
 
52f876e
bfd8d3b
 
52f876e
 
 
 
6512f81
0eed4f9
4c3cb67
bcb9a31
52f876e
 
4c3cb67
52f876e
719cf49
f00b78f
 
 
 
430ede3
52f876e
 
 
f00b78f
52f876e
bfd8d3b
f00b78f
 
430ede3
52f876e
 
 
 
 
 
 
 
 
 
3bf582d
52f876e
 
 
 
 
 
 
 
 
430ede3
52f876e
870b91d
52f876e
870b91d
52f876e
 
 
 
 
870b91d
52f876e
719cf49
4c3cb67
430ede3
 
3bf582d
36e77cf
 
bfd8d3b
36e77cf
430ede3
bfd8d3b

import gradio as gr
from transformers import GPT2Tokenizer, GPTNeoForCausalLM
import torch
import traceback
import gc
import psutil
import time

# Enable garbage collection
gc.enable()

# Properly handle CUDA settings
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Global variables
model = None
tokenizer = None

def log_memory_usage():
    process = psutil.Process()
    memory_info = process.memory_info()
    print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")

def load_model():
    global model, tokenizer
    try:
        print("Starting model loading...")
        log_memory_usage()
        
        tokenizer = GPT2Tokenizer.from_pretrained(
            "harshagnihotri14/SOP_Generator",
            local_files_only=False
        )
        
        model = GPTNeoForCausalLM.from_pretrained(
            "harshagnihotri14/SOP_Generator",
            low_cpu_mem_usage=True,
            torch_dtype=torch.float32,
            device_map='auto'
        )
        
        print("Model loaded successfully")
        log_memory_usage()
        return True
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return False

def cleanup():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    log_memory_usage()

def generate_sop_with_retry(name, course_name, university, university_location, academic_background, 
                          marks, subjects, work_experience, extracurricular_activities, 
                          interests, short_term_goals, long_term_goals, max_retries=2):
    """Attempt to generate SOP multiple times if output is too short"""
    for attempt in range(max_retries):
        sop = generate_sop_attempt(name, course_name, university, university_location, 
                                 academic_background, marks, subjects, work_experience,
                                 extracurricular_activities, interests, short_term_goals, 
                                 long_term_goals)
        
        if not isinstance(sop, str) or "Error:" in sop:
            continue
            
        word_count = len(sop.split())
        if word_count >= 800:  # Reduced from 1000 for better performance
            return sop
            
    return "Error: Unable to generate a sufficiently detailed SOP after multiple attempts. Please try again with more detailed input information."

def generate_sop_attempt(name, course_name, university, university_location, academic_background, 
                        marks, subjects, work_experience, extracurricular_activities, 
                        interests, short_term_goals, long_term_goals, progress=gr.Progress()):
    
    try:
        if model is None or tokenizer is None:
            progress(0.1, desc="Loading model...")
            success = load_model()
            if not success:
                return "Error: Failed to load the model. Please try again."
        
        progress(0.3, desc="Preparing prompt...")
        
        # Simplified prompt structure for better performance
        prompt = (
            f"Write a Statement of Purpose (SOP) for {name} applying to {course_name} "
            f"at {university}, {university_location}.\n\n"
            
            f"Academic Background: {academic_background}\n"
            f"Academic Performance: {marks}\n"
            f"Key Subjects: {subjects}\n"
            f"Work Experience: {work_experience}\n"
            f"Extracurricular Activities: {extracurricular_activities}\n"
            f"Interests: {interests}\n"
            f"Short-term Goals: {short_term_goals}\n"
            f"Long-term Goals: {long_term_goals}\n\n"
            
            "Write a comprehensive SOP (800-1000 words) that includes:\n"
            "1. Strong introduction with personal motivation\n"
            "2. Academic achievements and relevant coursework\n"
            "3. Professional experience and projects\n"
            "4. Research interests and activities\n"
            "5. Extracurricular activities and leadership\n"
            "6. Career goals and program fit\n"
            "7. Conclusion\n\n"
            
            "BEGIN SOP:\n"
        )
        
        progress(0.5, desc="Tokenizing input...")
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        
        progress(0.7, desc="Generating SOP...")
        
        # Optimized generation parameters for CPU
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=2048,     # Reduced for better performance
                min_length=500,
                temperature=0.7,
                repetition_penalty=1.5,
                num_beams=4,         # Reduced for better performance
                length_penalty=2.0,
                top_p=0.85,
                top_k=50,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        progress(0.9, desc="Processing output...")
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract the SOP content
        sop_start = generated_text.find("BEGIN SOP:")
        if sop_start != -1:
            sop_text = generated_text[sop_start + len("BEGIN SOP:"):].strip()
        else:
            sop_text = generated_text.split("Now, write a detailed")[-1].strip()
        
        # Format the final output
        formatted_sop = (
            "STATEMENT OF PURPOSE\n\n"
            f"Name: {name}\n"
            f"Program: {course_name}\n"
            f"University: {university}, {university_location}\n\n"
            f"{sop_text}"
        )
        
        cleanup()
        progress(1.0, desc="Done!")
        
        return formatted_sop

    except Exception as e:
        cleanup()
        error_msg = f"Error during SOP generation: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return f"An error occurred while generating the SOP. Please try again. Error: {str(e)}"

def generate_sop(*args):
    """Wrapper function to handle the retry logic"""
    return generate_sop_with_retry(*args)

# Gradio interface with optimized settings
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # Professional SOP Generator
    Generate a comprehensive Statement of Purpose for your graduate school application.
    
    **Guidelines:**
    - Provide detailed information in all fields
    - Include specific examples and achievements
    - The generated SOP will be approximately 800-1000 words
    """)
    
    with gr.Row():
        with gr.Column():
            name = gr.Textbox(label="Full Name", placeholder="Enter your full name")
            course_name = gr.Textbox(label="Course Name", placeholder="Enter the program name")
            university = gr.Textbox(label="University", placeholder="Enter university name")
            university_location = gr.Textbox(label="University Location", placeholder="City, Country")
            academic_background = gr.Textbox(label="Academic Background", placeholder="Your current/previous degree")
            marks = gr.Textbox(label="Academic Marks/Grades", placeholder="Your GPA or percentage")
        
        with gr.Column():
            subjects = gr.Textbox(label="Major Subjects", placeholder="List your major subjects")
            work_experience = gr.Textbox(label="Work Experience", placeholder="Relevant work experience")
            extracurricular_activities = gr.Textbox(label="Extracurricular Activities", placeholder="List your activities")
            interests = gr.Textbox(label="Personal Interests", placeholder="Your interests and hobbies")
            short_term_goals = gr.Textbox(label="Short Term Goals", placeholder="Your immediate career goals")
            long_term_goals = gr.Textbox(label="Long Term Goals", placeholder="Your long-term career aspirations")
    
    submit = gr.Button("Generate SOP", variant="primary")
    output = gr.Textbox(label="Generated SOP", lines=30)
    
    # Fixed submit.click() without invalid parameters
    submit.click(
        fn=generate_sop,
        inputs=[
            name, course_name, university, university_location, academic_background,
            marks, subjects, work_experience, extracurricular_activities,
            interests, short_term_goals, long_term_goals
        ],
        outputs=output
    )

if __name__ == "__main__":
    # Configure Gradio queue for better resource management
    demo.queue(max_size=5)  # Limit concurrent requests
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_error=True,
        debug=True,
    )