Spaces:

AdamNovotnyCom
/

llama2-gradio-huggingface

Running

AdamNovotnyCom commited on Oct 23, 2023

Commit

65c9805

1 Parent(s): 5574f70

new model pegasus-cnn_dailymail

Files changed (3) hide show

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ See live app on [Hugging Face](https://huggingface.co/spaces/AdamNovotnyCom/llam
 Start
-    export HF_TOKEN=paste_HF_token && docker-compose -f docker-compose.yml up llama2hf
 View app in browser
@@ -25,7 +25,7 @@ View app in browser
 Exec command
-    docker exec -it llama2hf bash -c 'pip install torch==2.1.*'
 ## References
 - [huggingface.co/llama2](https://huggingface.co/blog/llama2)

 Start
+    export HF_TOKEN=paste_HF_token && docker-compose -f docker-compose.yml up gradiohf
 View app in browser
 Exec command
+    docker exec -it gradiohf bash -c 'pip install torch==2.1.*'
 ## References
 - [huggingface.co/llama2](https://huggingface.co/blog/llama2)

app.py CHANGED Viewed

@@ -11,12 +11,23 @@ if torch.cuda.is_available():
 else:
     logging.info("Running on CPU")
 if "googleflan" == os.environ.get("MODEL"):
     ### Fast/small model used to debug UI on local machine
     model = "google/flan-t5-small"
     pipeline = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
         return pipeline(input_text)
 elif "llama" == os.environ.get("MODEL"):
     ### Works on CPU but runtime is > 4 minutes
     model = "meta-llama/Llama-2-7b-chat-hf"
@@ -48,12 +59,8 @@ elif "llama" == os.environ.get("MODEL"):
         for seq in sequences:
             output_text += seq["generated_text"] + "\n"
         return output_text
-elif "summary" == os.environ.get("MODEL"):
-    model="facebook/bart-large-cnn"
-    summarizer = transformers.pipeline("summarization", model=model)
-    def model_func(input_text, request: gr.Request):
-        return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
 input_label = "How can I help?"
 if "summary" == os.environ.get("MODEL"):
     input_label = "Enter text to summarize"

 else:
     logging.info("Running on CPU")
+# Language model
 if "googleflan" == os.environ.get("MODEL"):
     ### Fast/small model used to debug UI on local machine
     model = "google/flan-t5-small"
     pipeline = transformers.pipeline("text2text-generation", model=model)
     def model_func(input_text, request: gr.Request):
         return pipeline(input_text)
+elif "summary_bart" == os.environ.get("MODEL"):
+    model="facebook/bart-large-cnn"
+    summarizer = transformers.pipeline("summarization", model=model)
+    def model_func(input_text, request: gr.Request):
+        return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
+elif "summary_pegasus" == os.environ.get("MODEL"):
+    model="pegasus-cnn_dailymail"
+    summarizer = transformers.pipeline("summarization", model=model)
+    def model_func(input_text, request: gr.Request):
+        return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
 elif "llama" == os.environ.get("MODEL"):
     ### Works on CPU but runtime is > 4 minutes
     model = "meta-llama/Llama-2-7b-chat-hf"
         for seq in sequences:
             output_text += seq["generated_text"] + "\n"
         return output_text
+# UI: Gradio
 input_label = "How can I help?"
 if "summary" == os.environ.get("MODEL"):
     input_label = "Enter text to summarize"

docker-compose.yml CHANGED Viewed

@@ -11,7 +11,7 @@ services:
     working_dir: /home/user/app
     environment:
       - HF_TOKEN=${HF_TOKEN}
-      - MODEL=googleflan
     stdin_open: true
     tty: true
     restart: always

     working_dir: /home/user/app
     environment:
       - HF_TOKEN=${HF_TOKEN}
+      - MODEL=summary_bart
     stdin_open: true
     tty: true
     restart: always