Spaces:

AdamNovotnyCom
/

llama2-gradio-huggingface

Sleeping

AdamNovotnyCom commited on Oct 11, 2023

Commit

55c8e0a

•

1 Parent(s): 66f3bf5

refactor

Files changed (2) hide show

app.py CHANGED Viewed

@@ -12,11 +12,13 @@ else:
     logging.info("Running on CPU")
 if "googleflan" == os.environ.get("MODEL"):
     model = "google/flan-t5-small"
     pipeline = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
         return pipeline(input_text)
 elif "llama" == os.environ.get("MODEL"):
     model = "meta-llama/Llama-2-7b-chat-hf"
     tokenizer = AutoTokenizer.from_pretrained(
         model,
@@ -50,7 +52,7 @@ elif "summary" == os.environ.get("MODEL"):
     model="facebook/bart-large-cnn"
     summarizer = transformers.pipeline("summarization", model=model)
     def model_func(input_text, request: gr.Request):
-        return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
 input_label = "How can I help?"
 if "summary" == os.environ.get("MODEL"):

     logging.info("Running on CPU")
 if "googleflan" == os.environ.get("MODEL"):
+    ### Fast/small model used to debug UI on local machine
     model = "google/flan-t5-small"
     pipeline = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
         return pipeline(input_text)
 elif "llama" == os.environ.get("MODEL"):
+    ### Works on CPU but runtime is > 4 minutes
     model = "meta-llama/Llama-2-7b-chat-hf"
     tokenizer = AutoTokenizer.from_pretrained(
         model,
     model="facebook/bart-large-cnn"
     summarizer = transformers.pipeline("summarization", model=model)
     def model_func(input_text, request: gr.Request):
+        return summarizer(input_text, max_length=300, min_length=30, do_sample=False)[0]["summary_text"]
 input_label = "How can I help?"
 if "summary" == os.environ.get("MODEL"):

docker-compose.yml CHANGED Viewed

@@ -11,8 +11,7 @@ services:
     working_dir: /home/user/app
     environment:
       - HF_TOKEN=${HF_TOKEN}
-      - MODEL=summary
-      # - MODEL=googleflan
     stdin_open: true
     tty: true
     restart: always

     working_dir: /home/user/app
     environment:
       - HF_TOKEN=${HF_TOKEN}
+      - MODEL=googleflan
     stdin_open: true
     tty: true
     restart: always