Spaces:

whyumesh
/

coder_v1

Paused

whyumesh commited on Oct 4

Commit

ea899f2

•

1 Parent(s): 579c265

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,29 +10,27 @@ def load_model():
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
-        device_map="auto"
     )
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     return model, tokenizer
 model, tokenizer = load_model()
-@spaces.GPU(duration=60)  # Adjust duration based on your needs
 def fix_code(input_code):
-    # Prepare the prompt
     messages = [
         {"role": "system", "content": "You are a helpful coding assistant. Please analyze the following code, identify any errors, and provide the corrected version."},
         {"role": "user", "content": f"Please fix this code:\n\n{input_code}"}
     ]
-    # Apply chat template
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
-    # Tokenize and generate
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
         **model_inputs,
@@ -41,7 +39,6 @@ def fix_code(input_code):
         top_p=0.95,
     )
-    # Decode only the new tokens
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
@@ -49,7 +46,6 @@ def fix_code(input_code):
     return response
-# Create Gradio interface
 iface = gr.Interface(
     fn=fix_code,
     inputs=gr.Code(

     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
+        device_map="auto",
+        low_cpu_mem_usage=True  # This requires Accelerate
     )
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     return model, tokenizer
 model, tokenizer = load_model()
+@spaces.GPU(duration=60)
 def fix_code(input_code):
     messages = [
         {"role": "system", "content": "You are a helpful coding assistant. Please analyze the following code, identify any errors, and provide the corrected version."},
         {"role": "user", "content": f"Please fix this code:\n\n{input_code}"}
     ]
     text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True
     )
     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
     generated_ids = model.generate(
         **model_inputs,
         top_p=0.95,
     )
     generated_ids = [
         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
     ]
     return response
 iface = gr.Interface(
     fn=fix_code,
     inputs=gr.Code(