ActiveYixiao commited on
Commit
302476b
·
verified ·
1 Parent(s): ab5c129

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -4
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import logging
2
  import textwrap
3
  from typing import Literal, Optional
4
-
5
  import gradio as gr
6
  import outlines
7
  import pandas as pd
@@ -17,6 +17,8 @@ from transformers import (
17
  BitsAndBytesConfig,
18
  )
19
 
 
 
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
@@ -70,10 +72,28 @@ PROMPT_TEMPLATE = textwrap.dedent("""
70
  </Answer>
71
  Score:""").strip()
72
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  def get_outlines_model(
75
- model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
76
  ):
 
 
 
 
 
 
 
77
  if quantization_bits == 4:
78
  quantization_config = BitsAndBytesConfig(
79
  load_in_4bit=True,
@@ -120,7 +140,7 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
120
  return full_prompt
121
 
122
 
123
- @spaces.GPU
124
  def label_single_response_with_model(model_id, story, question, criteria, response):
125
  prompt = format_prompt(story, question, criteria, response)
126
  logger.info(f"Prompt: {prompt}")
@@ -143,7 +163,7 @@ def label_single_response_with_model(model_id, story, question, criteria, respon
143
  return result.score
144
 
145
 
146
- @spaces.GPU
147
  def label_multi_responses_with_model(
148
  model_id, story, question, criteria, response_file
149
  ):
 
1
  import logging
2
  import textwrap
3
  from typing import Literal, Optional
4
+ import os
5
  import gradio as gr
6
  import outlines
7
  import pandas as pd
 
17
  BitsAndBytesConfig,
18
  )
19
 
20
+
21
+
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
 
72
  </Answer>
73
  Score:""").strip()
74
 
75
+ if is_huggingface_space():
76
+ DEVICE_MAP = "cpu"
77
+ QUANTIZATION_BITS = None
78
+ else:
79
+ DEVICE_MAP = "auto"
80
+ QUANTIZATION_BITS = 4 # or whatever you prefer for local deployment
81
+
82
+ def is_huggingface_space():
83
+ return os.environ.get('SPACE_ID') is not None
84
+
85
+
86
 
87
  def get_outlines_model(
88
+ model_id: str, device_map: str = "cpu", quantization_bits: Optional[int] = None
89
  ):
90
+ # Skip quantization on CPU
91
+ if device_map == "cpu":
92
+ quantization_config = None
93
+ else:
94
+ # Your existing quantization logic
95
+ pass
96
+
97
  if quantization_bits == 4:
98
  quantization_config = BitsAndBytesConfig(
99
  load_in_4bit=True,
 
140
  return full_prompt
141
 
142
 
143
+ # @spaces.GPU
144
  def label_single_response_with_model(model_id, story, question, criteria, response):
145
  prompt = format_prompt(story, question, criteria, response)
146
  logger.info(f"Prompt: {prompt}")
 
163
  return result.score
164
 
165
 
166
+ # @spaces.GPU
167
  def label_multi_responses_with_model(
168
  model_id, story, question, criteria, response_file
169
  ):