Spaces:

AdamNovotnyCom
/

llama2-gradio-huggingface

Running

AdamNovotnyCom commited on Oct 10, 2023

Commit

15a1b0b

1 Parent(s): f0a60ae

multiple models

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -20,8 +20,6 @@ RUN pip install -r requirements.txt
 EXPOSE 7860
-ENV MODEL=llama
 RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
 CMD ["python", "app.py"]

 EXPOSE 7860
 RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true
 CMD ["python", "app.py"]

Dockerfile_dev CHANGED Viewed

@@ -20,7 +20,5 @@ RUN pip install -r requirements.txt
 EXPOSE 7860
-ENV MODEL=googleflan
 # with reload
 CMD ["gradio", "app.py"]

 EXPOSE 7860
 # with reload
 CMD ["gradio", "app.py"]

app.py CHANGED Viewed

@@ -6,20 +6,15 @@ import transformers
 from transformers import AutoTokenizer
 logging.basicConfig(level=logging.INFO)
 if "googleflan" == os.environ.get("MODEL"):
     model = "google/flan-t5-small"
-    logging.info(f"APP startup. Model {model}")
     pipe_flan = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
-        print(f"Input request: {input_text}")
-        print(request.query_params)
-        print(os.environ.get("HF_TOKEN")[:5])
-        logging.info(os.environ.get("HF_TOKEN")[:5])
         return pipe_flan(input_text)
 elif "llama" == os.environ.get("MODEL"):
     model = "meta-llama/Llama-2-7b-chat-hf"
-    logging.info(f"APP startup. Model {model}")
     tokenizer = AutoTokenizer.from_pretrained(
         model,
         token=os.environ.get("HF_TOKEN"),
@@ -27,7 +22,7 @@ elif "llama" == os.environ.get("MODEL"):
     pipeline = transformers.pipeline(
         "text-generation",
         model=model,
-        torch_dtype=torch.float16,
         device_map="auto",
         token=os.environ.get("HF_TOKEN"),
     )
@@ -57,7 +52,7 @@ demo = gr.Interface(
             value="",
     ),
     outputs=gr.Textbox(
-            label="LLM",
             lines=5,
             value="",
     ),

 from transformers import AutoTokenizer
 logging.basicConfig(level=logging.INFO)
+logging.info(f"APP startup")
 if "googleflan" == os.environ.get("MODEL"):
     model = "google/flan-t5-small"
     pipe_flan = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
         return pipe_flan(input_text)
 elif "llama" == os.environ.get("MODEL"):
     model = "meta-llama/Llama-2-7b-chat-hf"
     tokenizer = AutoTokenizer.from_pretrained(
         model,
         token=os.environ.get("HF_TOKEN"),
     pipeline = transformers.pipeline(
         "text-generation",
         model=model,
+        torch_dtype=torch.float32,
         device_map="auto",
         token=os.environ.get("HF_TOKEN"),
     )
             value="",
     ),
     outputs=gr.Textbox(
+            label=f"Model: {os.environ.get('MODEL')}",
             lines=5,
             value="",
     ),