demo_C2S_Scale

Running

App Files Files Community

napoles3d commited on 30 days ago

Commit

b5f99ea

verified ·

1 Parent(s): f4d00b3

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ def vram_gb():
 def build_prompt(gene_list, species="Homo sapiens"):
     if isinstance(gene_list, str):
-        # permitir lista separada por comas/espacios/nuevas líneas
         raw = [g.strip() for g in gene_list.replace("\n", ",").split(",") if g.strip()]
         genes = ", ".join(raw)
     else:
@@ -41,7 +40,7 @@ def unload():
 def load_model(model_id, quantization):
     """
     Carga perezosa del modelo. Para 27B se recomienda A100 80GB.
-    quantization: 'none' o '8bit' (requiere bitsandbytes).
     """
     if MODEL_CACHE["id"] == model_id and MODEL_CACHE["model"] is not None:
         return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"]
@@ -53,7 +52,7 @@ def load_model(model_id, quantization):
     kwargs = dict(torch_dtype=dtype, device_map=device_map, low_cpu_mem_usage=True)
-    if quantization == "8bit":
         try:
             import bitsandbytes as bnb  # noqa: F401
             kwargs.update(dict(load_in_8bit=True))
@@ -69,7 +68,11 @@ def load_model(model_id, quantization):
     MODEL_CACHE["model"] = mdl
     return tok, mdl
-def infer(model_id, species, genes_text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, quantization):
     # chequeo sencillo de VRAM con guía para 27B
     mem = vram_gb()
     warn = ""
@@ -77,11 +80,17 @@ def infer(model_id, species, genes_text, max_new_tokens, temperature, top_p, top
         if mem < 60 and quantization != "8bit":
             warn = (
                 f"⚠️ Detectada VRAM ~{mem:.1f}GB. Para 27B se recomienda A100 80GB "
-                f"o usar 8-bit (aun así puede ser insuficiente en T4)."
             )
     tok, mdl = load_model(model_id, quantization)
-    prompt = build_prompt(genes_text, species=species)
     inputs = tok(prompt, return_tensors="pt")
     if torch.cuda.is_available():
         inputs = {k: v.to(mdl.device) for k, v in inputs.items()}
@@ -110,7 +119,7 @@ def infer(model_id, species, genes_text, max_new_tokens, temperature, top_p, top
     thread.start()
     for new_text in streamer:
         output_text += new_text
-        yield (warn, prompt, output_text)
     thread.join()
 with gr.Blocks(title="C2S-Scale (Gemma-2) — Single-cell Biology") as demo:
@@ -118,19 +127,22 @@ with gr.Blocks(title="C2S-Scale (Gemma-2) — Single-cell Biology") as demo:
         """
         # C2S-Scale (Gemma-2) for single-cell biology
         Infiere **tipo celular** a partir de una *cell sentence* (genes ordenados por expresión).
-        - Modelos: `vandijklab/C2S-Scale-Gemma-2-2B` (ligero), `vandijklab/C2S-Scale-Gemma-2-27B` (pesado).
-        - Selecciona GPU en Settings del Space para mejor rendimiento.
-        **Nota:** 27B requiere GPU grande (idealmente A100 80GB). En T4, incluso con 8-bit, puede no cargar.
         """
     )
     with gr.Row():
         model_id = gr.Dropdown(
             choices=[DEFAULT_MODEL_SMALL, DEFAULT_MODEL_LARGE],
             value=DEFAULT_MODEL_SMALL,
             label="Modelo"
         )
-        quantization = gr.Radio(["none", "8bit"], value="none", label="Cuantización (experimental)")
         species = gr.Dropdown(["Homo sapiens", "Mus musculus", "Danio rerio", "Custom…"], value="Homo sapiens", label="Especie")
         species_custom = gr.Textbox(value="", label="Especie (si elegiste Custom…)", visible=False)
@@ -148,20 +160,19 @@ with gr.Blocks(title="C2S-Scale (Gemma-2) — Single-cell Biology") as demo:
         top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
         repetition_penalty = gr.Slider(0.8, 1.5, value=1.05, step=0.01, label="repetition_penalty")
     warn_box = gr.Markdown("")
-    prompt_box = gr.Textbox(label="Prompt efectivo", lines=8)
     output_box = gr.Textbox(label="Salida del modelo (stream)")
-    def _species_value(sp, custom):
-        return custom if sp == "Custom…" and custom.strip() else sp
     run_btn = gr.Button("🚀 Inferir tipo celular")
     run_btn.click(
-        fn=lambda mid, sp, spc, genes, mx, temp, tp, tk, rp, q: infer(
-            mid, _species_value(sp, spc), genes, mx, temp, tp, tk, rp, q
-        ),
-        inputs=[model_id, species, species_custom, genes_text, max_new_tokens, temperature, top_p, top_k, repetition_penalty, quantization],
-        outputs=[warn_box, prompt_box, output_box]
     )
 if __name__ == "__main__":

 def build_prompt(gene_list, species="Homo sapiens"):
     if isinstance(gene_list, str):
         raw = [g.strip() for g in gene_list.replace("\n", ",").split(",") if g.strip()]
         genes = ", ".join(raw)
     else:
 def load_model(model_id, quantization):
     """
     Carga perezosa del modelo. Para 27B se recomienda A100 80GB.
+    quantization: 'none' o '8bit' (requiere bitsandbytes si hay GPU).
     """
     if MODEL_CACHE["id"] == model_id and MODEL_CACHE["model"] is not None:
         return MODEL_CACHE["tokenizer"], MODEL_CACHE["model"]
     kwargs = dict(torch_dtype=dtype, device_map=device_map, low_cpu_mem_usage=True)
+    if quantization == "8bit" and torch.cuda.is_available():
         try:
             import bitsandbytes as bnb  # noqa: F401
             kwargs.update(dict(load_in_8bit=True))
     MODEL_CACHE["model"] = mdl
     return tok, mdl
+def infer(model_id, species, species_custom, genes_text, prompt_manual,
+          max_new_tokens, temperature, top_p, top_k, repetition_penalty, quantization):
+    # especie efectiva
+    species_eff = species_custom.strip() if (species == "Custom…" and species_custom.strip()) else species
     # chequeo sencillo de VRAM con guía para 27B
     mem = vram_gb()
     warn = ""
         if mem < 60 and quantization != "8bit":
             warn = (
                 f"⚠️ Detectada VRAM ~{mem:.1f}GB. Para 27B se recomienda A100 80GB "
+                f"o intentar 8-bit (en T4 puede no ser suficiente)."
             )
     tok, mdl = load_model(model_id, quantization)
+    # prompt: usa el manual si está provisto; si no, lo construimos
+    if prompt_manual and str(prompt_manual).strip():
+        prompt = str(prompt_manual).strip()
+    else:
+        prompt = build_prompt(genes_text, species=species_eff)
     inputs = tok(prompt, return_tensors="pt")
     if torch.cuda.is_available():
         inputs = {k: v.to(mdl.device) for k, v in inputs.items()}
     thread.start()
     for new_text in streamer:
         output_text += new_text
+        yield (warn, output_text)
     thread.join()
 with gr.Blocks(title="C2S-Scale (Gemma-2) — Single-cell Biology") as demo:
         """
         # C2S-Scale (Gemma-2) for single-cell biology
         Infiere **tipo celular** a partir de una *cell sentence* (genes ordenados por expresión).
+        **Modelos**:
+        - `vandijklab/C2S-Scale-Gemma-2-2B` (ligero; CPU o GPU)
+        - `vandijklab/C2S-Scale-Gemma-2-27B` (pesado; ideal A100 80GB)
+        **Nota:** El campo *Prompt efectivo* es editable. Si lo dejas vacío, el app generará uno automáticamente.
         """
     )
     with gr.Row():
         model_id = gr.Dropdown(
             choices=[DEFAULT_MODEL_SMALL, DEFAULT_MODEL_LARGE],
             value=DEFAULT_MODEL_SMALL,
             label="Modelo"
         )
+        quantization = gr.Radio(["none", "8bit"], value="none", label="Cuantización (GPU opcional)")
         species = gr.Dropdown(["Homo sapiens", "Mus musculus", "Danio rerio", "Custom…"], value="Homo sapiens", label="Especie")
         species_custom = gr.Textbox(value="", label="Especie (si elegiste Custom…)", visible=False)
         top_k = gr.Slider(1, 200, value=50, step=1, label="top_k")
         repetition_penalty = gr.Slider(0.8, 1.5, value=1.05, step=0.01, label="repetition_penalty")
+    # PROMPT EFECTIVO (editable por el usuario)
+    prompt_box = gr.Textbox(label="Prompt efectivo (opcional; déjalo vacío para autogenerar)", lines=8, interactive=True)
     warn_box = gr.Markdown("")
     output_box = gr.Textbox(label="Salida del modelo (stream)")
     run_btn = gr.Button("🚀 Inferir tipo celular")
     run_btn.click(
+        fn=infer,
+        inputs=[model_id, species, species_custom, genes_text, prompt_box,
+                max_new_tokens, temperature, top_p, top_k, repetition_penalty, quantization],
+        outputs=[warn_box, output_box]
     )
 if __name__ == "__main__":