File size: 8,783 Bytes
719cf49
d0245d4
0891b5b
 
4c3cb67
430ede3
6512f81
719cf49
a13d626
52f876e
a13d626
 
 
 
 
430ede3
 
36e77cf
52f876e
0891b5b
430ede3
 
 
 
 
4c3cb67
 
 
6512f81
430ede3
 
 
 
 
 
 
4c3cb67
 
 
430ede3
 
4c3cb67
430ede3
52f876e
430ede3
4c3cb67
 
 
 
 
52f876e
 
430ede3
 
 
52f876e
f00b78f
 
430ede3
f00b78f
 
 
 
 
 
 
 
 
 
 
430ede3
f00b78f
 
430ede3
f00b78f
 
 
 
0eed4f9
4c3cb67
0eed4f9
4c3cb67
52f876e
 
4c3cb67
52f876e
4c3cb67
6512f81
430ede3
52f876e
430ede3
 
7e80be1
f00b78f
 
 
 
 
 
 
 
7e80be1
430ede3
 
 
 
 
 
 
 
f00b78f
 
52f876e
 
 
 
4c3cb67
6512f81
 
430ede3
52f876e
 
 
430ede3
 
f00b78f
 
430ede3
 
f00b78f
 
52f876e
430ede3
52f876e
 
 
 
 
f00b78f
 
7e80be1
f00b78f
7e80be1
f00b78f
52f876e
bfd8d3b
52f876e
bfd8d3b
 
52f876e
bfd8d3b
 
52f876e
 
 
 
6512f81
0eed4f9
4c3cb67
bcb9a31
52f876e
 
4c3cb67
52f876e
719cf49
f00b78f
 
 
 
430ede3
52f876e
 
 
f00b78f
52f876e
bfd8d3b
f00b78f
 
430ede3
52f876e
 
 
 
 
 
 
 
 
 
3bf582d
52f876e
 
 
 
 
 
 
 
 
430ede3
52f876e
870b91d
52f876e
870b91d
52f876e
 
 
 
 
870b91d
52f876e
719cf49
4c3cb67
430ede3
 
3bf582d
36e77cf
 
bfd8d3b
36e77cf
430ede3
bfd8d3b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import gradio as gr
from transformers import GPT2Tokenizer, GPTNeoForCausalLM
import torch
import traceback
import gc
import psutil
import time

# Enable garbage collection
gc.enable()

# Properly handle CUDA settings
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Global variables
model = None
tokenizer = None

def log_memory_usage():
    process = psutil.Process()
    memory_info = process.memory_info()
    print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")

def load_model():
    global model, tokenizer
    try:
        print("Starting model loading...")
        log_memory_usage()
        
        tokenizer = GPT2Tokenizer.from_pretrained(
            "harshagnihotri14/SOP_Generator",
            local_files_only=False
        )
        
        model = GPTNeoForCausalLM.from_pretrained(
            "harshagnihotri14/SOP_Generator",
            low_cpu_mem_usage=True,
            torch_dtype=torch.float32,
            device_map='auto'
        )
        
        print("Model loaded successfully")
        log_memory_usage()
        return True
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return False

def cleanup():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    log_memory_usage()

def generate_sop_with_retry(name, course_name, university, university_location, academic_background, 
                          marks, subjects, work_experience, extracurricular_activities, 
                          interests, short_term_goals, long_term_goals, max_retries=2):
    """Attempt to generate SOP multiple times if output is too short"""
    for attempt in range(max_retries):
        sop = generate_sop_attempt(name, course_name, university, university_location, 
                                 academic_background, marks, subjects, work_experience,
                                 extracurricular_activities, interests, short_term_goals, 
                                 long_term_goals)
        
        if not isinstance(sop, str) or "Error:" in sop:
            continue
            
        word_count = len(sop.split())
        if word_count >= 800:  # Reduced from 1000 for better performance
            return sop
            
    return "Error: Unable to generate a sufficiently detailed SOP after multiple attempts. Please try again with more detailed input information."

def generate_sop_attempt(name, course_name, university, university_location, academic_background, 
                        marks, subjects, work_experience, extracurricular_activities, 
                        interests, short_term_goals, long_term_goals, progress=gr.Progress()):
    
    try:
        if model is None or tokenizer is None:
            progress(0.1, desc="Loading model...")
            success = load_model()
            if not success:
                return "Error: Failed to load the model. Please try again."
        
        progress(0.3, desc="Preparing prompt...")
        
        # Simplified prompt structure for better performance
        prompt = (
            f"Write a Statement of Purpose (SOP) for {name} applying to {course_name} "
            f"at {university}, {university_location}.\n\n"
            
            f"Academic Background: {academic_background}\n"
            f"Academic Performance: {marks}\n"
            f"Key Subjects: {subjects}\n"
            f"Work Experience: {work_experience}\n"
            f"Extracurricular Activities: {extracurricular_activities}\n"
            f"Interests: {interests}\n"
            f"Short-term Goals: {short_term_goals}\n"
            f"Long-term Goals: {long_term_goals}\n\n"
            
            "Write a comprehensive SOP (800-1000 words) that includes:\n"
            "1. Strong introduction with personal motivation\n"
            "2. Academic achievements and relevant coursework\n"
            "3. Professional experience and projects\n"
            "4. Research interests and activities\n"
            "5. Extracurricular activities and leadership\n"
            "6. Career goals and program fit\n"
            "7. Conclusion\n\n"
            
            "BEGIN SOP:\n"
        )
        
        progress(0.5, desc="Tokenizing input...")
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        
        progress(0.7, desc="Generating SOP...")
        
        # Optimized generation parameters for CPU
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=2048,     # Reduced for better performance
                min_length=500,
                temperature=0.7,
                repetition_penalty=1.5,
                num_beams=4,         # Reduced for better performance
                length_penalty=2.0,
                top_p=0.85,
                top_k=50,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id
            )
        
        progress(0.9, desc="Processing output...")
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract the SOP content
        sop_start = generated_text.find("BEGIN SOP:")
        if sop_start != -1:
            sop_text = generated_text[sop_start + len("BEGIN SOP:"):].strip()
        else:
            sop_text = generated_text.split("Now, write a detailed")[-1].strip()
        
        # Format the final output
        formatted_sop = (
            "STATEMENT OF PURPOSE\n\n"
            f"Name: {name}\n"
            f"Program: {course_name}\n"
            f"University: {university}, {university_location}\n\n"
            f"{sop_text}"
        )
        
        cleanup()
        progress(1.0, desc="Done!")
        
        return formatted_sop

    except Exception as e:
        cleanup()
        error_msg = f"Error during SOP generation: {str(e)}\n{traceback.format_exc()}"
        print(error_msg)
        return f"An error occurred while generating the SOP. Please try again. Error: {str(e)}"

def generate_sop(*args):
    """Wrapper function to handle the retry logic"""
    return generate_sop_with_retry(*args)

# Gradio interface with optimized settings
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # Professional SOP Generator
    Generate a comprehensive Statement of Purpose for your graduate school application.
    
    **Guidelines:**
    - Provide detailed information in all fields
    - Include specific examples and achievements
    - The generated SOP will be approximately 800-1000 words
    """)
    
    with gr.Row():
        with gr.Column():
            name = gr.Textbox(label="Full Name", placeholder="Enter your full name")
            course_name = gr.Textbox(label="Course Name", placeholder="Enter the program name")
            university = gr.Textbox(label="University", placeholder="Enter university name")
            university_location = gr.Textbox(label="University Location", placeholder="City, Country")
            academic_background = gr.Textbox(label="Academic Background", placeholder="Your current/previous degree")
            marks = gr.Textbox(label="Academic Marks/Grades", placeholder="Your GPA or percentage")
        
        with gr.Column():
            subjects = gr.Textbox(label="Major Subjects", placeholder="List your major subjects")
            work_experience = gr.Textbox(label="Work Experience", placeholder="Relevant work experience")
            extracurricular_activities = gr.Textbox(label="Extracurricular Activities", placeholder="List your activities")
            interests = gr.Textbox(label="Personal Interests", placeholder="Your interests and hobbies")
            short_term_goals = gr.Textbox(label="Short Term Goals", placeholder="Your immediate career goals")
            long_term_goals = gr.Textbox(label="Long Term Goals", placeholder="Your long-term career aspirations")
    
    submit = gr.Button("Generate SOP", variant="primary")
    output = gr.Textbox(label="Generated SOP", lines=30)
    
    # Fixed submit.click() without invalid parameters
    submit.click(
        fn=generate_sop,
        inputs=[
            name, course_name, university, university_location, academic_background,
            marks, subjects, work_experience, extracurricular_activities,
            interests, short_term_goals, long_term_goals
        ],
        outputs=output
    )

if __name__ == "__main__":
    # Configure Gradio queue for better resource management
    demo.queue(max_size=5)  # Limit concurrent requests
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True,
        show_error=True,
        debug=True,
    )