Spaces:
Sleeping
Sleeping
File size: 8,783 Bytes
719cf49 d0245d4 0891b5b 4c3cb67 430ede3 6512f81 719cf49 a13d626 52f876e a13d626 430ede3 36e77cf 52f876e 0891b5b 430ede3 4c3cb67 6512f81 430ede3 4c3cb67 430ede3 4c3cb67 430ede3 52f876e 430ede3 4c3cb67 52f876e 430ede3 52f876e f00b78f 430ede3 f00b78f 430ede3 f00b78f 430ede3 f00b78f 0eed4f9 4c3cb67 0eed4f9 4c3cb67 52f876e 4c3cb67 52f876e 4c3cb67 6512f81 430ede3 52f876e 430ede3 7e80be1 f00b78f 7e80be1 430ede3 f00b78f 52f876e 4c3cb67 6512f81 430ede3 52f876e 430ede3 f00b78f 430ede3 f00b78f 52f876e 430ede3 52f876e f00b78f 7e80be1 f00b78f 7e80be1 f00b78f 52f876e bfd8d3b 52f876e bfd8d3b 52f876e bfd8d3b 52f876e 6512f81 0eed4f9 4c3cb67 bcb9a31 52f876e 4c3cb67 52f876e 719cf49 f00b78f 430ede3 52f876e f00b78f 52f876e bfd8d3b f00b78f 430ede3 52f876e 3bf582d 52f876e 430ede3 52f876e 870b91d 52f876e 870b91d 52f876e 870b91d 52f876e 719cf49 4c3cb67 430ede3 3bf582d 36e77cf bfd8d3b 36e77cf 430ede3 bfd8d3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import gradio as gr
from transformers import GPT2Tokenizer, GPTNeoForCausalLM
import torch
import traceback
import gc
import psutil
import time
# Enable garbage collection
gc.enable()
# Properly handle CUDA settings
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# Global variables
model = None
tokenizer = None
def log_memory_usage():
process = psutil.Process()
memory_info = process.memory_info()
print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
def load_model():
global model, tokenizer
try:
print("Starting model loading...")
log_memory_usage()
tokenizer = GPT2Tokenizer.from_pretrained(
"harshagnihotri14/SOP_Generator",
local_files_only=False
)
model = GPTNeoForCausalLM.from_pretrained(
"harshagnihotri14/SOP_Generator",
low_cpu_mem_usage=True,
torch_dtype=torch.float32,
device_map='auto'
)
print("Model loaded successfully")
log_memory_usage()
return True
except Exception as e:
print(f"Error loading model: {str(e)}")
return False
def cleanup():
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
log_memory_usage()
def generate_sop_with_retry(name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals, max_retries=2):
"""Attempt to generate SOP multiple times if output is too short"""
for attempt in range(max_retries):
sop = generate_sop_attempt(name, course_name, university, university_location,
academic_background, marks, subjects, work_experience,
extracurricular_activities, interests, short_term_goals,
long_term_goals)
if not isinstance(sop, str) or "Error:" in sop:
continue
word_count = len(sop.split())
if word_count >= 800: # Reduced from 1000 for better performance
return sop
return "Error: Unable to generate a sufficiently detailed SOP after multiple attempts. Please try again with more detailed input information."
def generate_sop_attempt(name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals, progress=gr.Progress()):
try:
if model is None or tokenizer is None:
progress(0.1, desc="Loading model...")
success = load_model()
if not success:
return "Error: Failed to load the model. Please try again."
progress(0.3, desc="Preparing prompt...")
# Simplified prompt structure for better performance
prompt = (
f"Write a Statement of Purpose (SOP) for {name} applying to {course_name} "
f"at {university}, {university_location}.\n\n"
f"Academic Background: {academic_background}\n"
f"Academic Performance: {marks}\n"
f"Key Subjects: {subjects}\n"
f"Work Experience: {work_experience}\n"
f"Extracurricular Activities: {extracurricular_activities}\n"
f"Interests: {interests}\n"
f"Short-term Goals: {short_term_goals}\n"
f"Long-term Goals: {long_term_goals}\n\n"
"Write a comprehensive SOP (800-1000 words) that includes:\n"
"1. Strong introduction with personal motivation\n"
"2. Academic achievements and relevant coursework\n"
"3. Professional experience and projects\n"
"4. Research interests and activities\n"
"5. Extracurricular activities and leadership\n"
"6. Career goals and program fit\n"
"7. Conclusion\n\n"
"BEGIN SOP:\n"
)
progress(0.5, desc="Tokenizing input...")
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
progress(0.7, desc="Generating SOP...")
# Optimized generation parameters for CPU
with torch.no_grad():
outputs = model.generate(
**inputs,
max_length=2048, # Reduced for better performance
min_length=500,
temperature=0.7,
repetition_penalty=1.5,
num_beams=4, # Reduced for better performance
length_penalty=2.0,
top_p=0.85,
top_k=50,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
progress(0.9, desc="Processing output...")
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract the SOP content
sop_start = generated_text.find("BEGIN SOP:")
if sop_start != -1:
sop_text = generated_text[sop_start + len("BEGIN SOP:"):].strip()
else:
sop_text = generated_text.split("Now, write a detailed")[-1].strip()
# Format the final output
formatted_sop = (
"STATEMENT OF PURPOSE\n\n"
f"Name: {name}\n"
f"Program: {course_name}\n"
f"University: {university}, {university_location}\n\n"
f"{sop_text}"
)
cleanup()
progress(1.0, desc="Done!")
return formatted_sop
except Exception as e:
cleanup()
error_msg = f"Error during SOP generation: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
return f"An error occurred while generating the SOP. Please try again. Error: {str(e)}"
def generate_sop(*args):
"""Wrapper function to handle the retry logic"""
return generate_sop_with_retry(*args)
# Gradio interface with optimized settings
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# Professional SOP Generator
Generate a comprehensive Statement of Purpose for your graduate school application.
**Guidelines:**
- Provide detailed information in all fields
- Include specific examples and achievements
- The generated SOP will be approximately 800-1000 words
""")
with gr.Row():
with gr.Column():
name = gr.Textbox(label="Full Name", placeholder="Enter your full name")
course_name = gr.Textbox(label="Course Name", placeholder="Enter the program name")
university = gr.Textbox(label="University", placeholder="Enter university name")
university_location = gr.Textbox(label="University Location", placeholder="City, Country")
academic_background = gr.Textbox(label="Academic Background", placeholder="Your current/previous degree")
marks = gr.Textbox(label="Academic Marks/Grades", placeholder="Your GPA or percentage")
with gr.Column():
subjects = gr.Textbox(label="Major Subjects", placeholder="List your major subjects")
work_experience = gr.Textbox(label="Work Experience", placeholder="Relevant work experience")
extracurricular_activities = gr.Textbox(label="Extracurricular Activities", placeholder="List your activities")
interests = gr.Textbox(label="Personal Interests", placeholder="Your interests and hobbies")
short_term_goals = gr.Textbox(label="Short Term Goals", placeholder="Your immediate career goals")
long_term_goals = gr.Textbox(label="Long Term Goals", placeholder="Your long-term career aspirations")
submit = gr.Button("Generate SOP", variant="primary")
output = gr.Textbox(label="Generated SOP", lines=30)
# Fixed submit.click() without invalid parameters
submit.click(
fn=generate_sop,
inputs=[
name, course_name, university, university_location, academic_background,
marks, subjects, work_experience, extracurricular_activities,
interests, short_term_goals, long_term_goals
],
outputs=output
)
if __name__ == "__main__":
# Configure Gradio queue for better resource management
demo.queue(max_size=5) # Limit concurrent requests
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True,
debug=True,
) |