AdamNovotnyCom commited on
Commit
65c9805
1 Parent(s): 5574f70

new model pegasus-cnn_dailymail

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +12 -5
  3. docker-compose.yml +1 -1
README.md CHANGED
@@ -17,7 +17,7 @@ See live app on [Hugging Face](https://huggingface.co/spaces/AdamNovotnyCom/llam
17
 
18
  Start
19
 
20
- export HF_TOKEN=paste_HF_token && docker-compose -f docker-compose.yml up llama2hf
21
 
22
  View app in browser
23
 
@@ -25,7 +25,7 @@ View app in browser
25
 
26
  Exec command
27
 
28
- docker exec -it llama2hf bash -c 'pip install torch==2.1.*'
29
 
30
  ## References
31
  - [huggingface.co/llama2](https://huggingface.co/blog/llama2)
 
17
 
18
  Start
19
 
20
+ export HF_TOKEN=paste_HF_token && docker-compose -f docker-compose.yml up gradiohf
21
 
22
  View app in browser
23
 
 
25
 
26
  Exec command
27
 
28
+ docker exec -it gradiohf bash -c 'pip install torch==2.1.*'
29
 
30
  ## References
31
  - [huggingface.co/llama2](https://huggingface.co/blog/llama2)
app.py CHANGED
@@ -11,12 +11,23 @@ if torch.cuda.is_available():
11
  else:
12
  logging.info("Running on CPU")
13
 
 
14
  if "googleflan" == os.environ.get("MODEL"):
15
  ### Fast/small model used to debug UI on local machine
16
  model = "google/flan-t5-small"
17
  pipeline = transformers.pipeline("text2text-generation", model=model)
18
  def model_func(input_text, request: gr.Request):
19
  return pipeline(input_text)
 
 
 
 
 
 
 
 
 
 
20
  elif "llama" == os.environ.get("MODEL"):
21
  ### Works on CPU but runtime is > 4 minutes
22
  model = "meta-llama/Llama-2-7b-chat-hf"
@@ -48,12 +59,8 @@ elif "llama" == os.environ.get("MODEL"):
48
  for seq in sequences:
49
  output_text += seq["generated_text"] + "\n"
50
  return output_text
51
- elif "summary" == os.environ.get("MODEL"):
52
- model="facebook/bart-large-cnn"
53
- summarizer = transformers.pipeline("summarization", model=model)
54
- def model_func(input_text, request: gr.Request):
55
- return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
56
 
 
57
  input_label = "How can I help?"
58
  if "summary" == os.environ.get("MODEL"):
59
  input_label = "Enter text to summarize"
 
11
  else:
12
  logging.info("Running on CPU")
13
 
14
+ # Language model
15
  if "googleflan" == os.environ.get("MODEL"):
16
  ### Fast/small model used to debug UI on local machine
17
  model = "google/flan-t5-small"
18
  pipeline = transformers.pipeline("text2text-generation", model=model)
19
  def model_func(input_text, request: gr.Request):
20
  return pipeline(input_text)
21
+ elif "summary_bart" == os.environ.get("MODEL"):
22
+ model="facebook/bart-large-cnn"
23
+ summarizer = transformers.pipeline("summarization", model=model)
24
+ def model_func(input_text, request: gr.Request):
25
+ return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
26
+ elif "summary_pegasus" == os.environ.get("MODEL"):
27
+ model="pegasus-cnn_dailymail"
28
+ summarizer = transformers.pipeline("summarization", model=model)
29
+ def model_func(input_text, request: gr.Request):
30
+ return summarizer(input_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
31
  elif "llama" == os.environ.get("MODEL"):
32
  ### Works on CPU but runtime is > 4 minutes
33
  model = "meta-llama/Llama-2-7b-chat-hf"
 
59
  for seq in sequences:
60
  output_text += seq["generated_text"] + "\n"
61
  return output_text
 
 
 
 
 
62
 
63
+ # UI: Gradio
64
  input_label = "How can I help?"
65
  if "summary" == os.environ.get("MODEL"):
66
  input_label = "Enter text to summarize"
docker-compose.yml CHANGED
@@ -11,7 +11,7 @@ services:
11
  working_dir: /home/user/app
12
  environment:
13
  - HF_TOKEN=${HF_TOKEN}
14
- - MODEL=googleflan
15
  stdin_open: true
16
  tty: true
17
  restart: always
 
11
  working_dir: /home/user/app
12
  environment:
13
  - HF_TOKEN=${HF_TOKEN}
14
+ - MODEL=summary_bart
15
  stdin_open: true
16
  tty: true
17
  restart: always