Spaces:
Sleeping
Sleeping
File size: 12,230 Bytes
1457295 f871f1a 1457295 f871f1a d198e0d f871f1a 0574f0a 9b002fb 0574f0a 9b002fb 104c3a4 9b002fb 104c3a4 0574f0a 104c3a4 0574f0a 104c3a4 0574f0a f6f2d18 f871f1a f6f2d18 f871f1a d198e0d 0574f0a f6f2d18 d198e0d f6f2d18 0574f0a d198e0d 9b002fb d198e0d 9b002fb d198e0d 9b002fb d198e0d f6f2d18 1960e32 f871f1a f6f2d18 0574f0a f6f2d18 f871f1a f6f2d18 f871f1a 0574f0a 104c3a4 0574f0a f871f1a f6f2d18 f871f1a d198e0d f871f1a d198e0d f871f1a f6f2d18 f871f1a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 |
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Set page configuration
st.set_page_config(
page_title="GemmaTextAppeal",
page_icon="✨",
layout="wide",
)
# App title and description
st.title("✨ GemmaTextAppeal")
st.markdown("""
### Interactive Demo of Google's Gemma 2-2B-IT Model
This app demonstrates the text generation capabilities of Google's Gemma 2-2B-IT model.
Enter a prompt below and see the model generate text in real-time!
""")
# Function to load model
@st.cache_resource(show_spinner=False)
def load_model():
try:
# Get API Token
huggingface_token = os.getenv("HF_TOKEN")
if not huggingface_token:
return None, None, "No Hugging Face API token found. Please add your token as a secret named 'HF_TOKEN'."
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
"google/gemma-2-2b-it",
token=huggingface_token
)
# Load model with appropriate configuration
model_kwargs = {
"token": huggingface_token,
"device_map": "auto" if torch.cuda.is_available() else None,
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
}
model = AutoModelForCausalLM.from_pretrained(
"google/gemma-2-2b-it",
**model_kwargs
)
return tokenizer, model, None
except Exception as e:
return None, None, str(e)
# Try to load the model at startup
with st.spinner("Initializing the Gemma model... this may take a minute."):
tokenizer, model, load_error = load_model()
if load_error:
st.error(f"Error loading model: {load_error}")
else:
if tokenizer and model:
st.success("✅ Gemma model loaded successfully! Ready to generate text.")
else:
st.warning("⚠️ Model not loaded. Please check your Hugging Face token.")
# Check for Hugging Face Token
huggingface_token = os.getenv("HF_TOKEN")
if not huggingface_token:
st.warning("""
⚠️ **No Hugging Face API token detected**
The Gemma models require accepting a license and authentication to use.
To make this app work:
1. Create a Hugging Face account
2. Accept the model license at: https://huggingface.co/google/gemma-2-2b-it
3. Create a HF token at: https://huggingface.co/settings/tokens
4. Add your token as a secret named 'HF_TOKEN' in your Space settings
""")
# Sidebar with information
with st.sidebar:
st.header("About Gemma")
st.markdown("""
[Gemma 2-2B-IT](https://huggingface.co/google/gemma-2-2b-it) is a lightweight 2B parameter instruction-tuned model from Google's Gemma family.
Key features:
- Efficient text generation
- Strong instruction following
- 2 billion parameters - fast enough to run on consumer hardware
- Trained on a mixture of text and code
This demo runs directly on Hugging Face Spaces!
""")
st.header("Usage Tips")
st.markdown("""
- Be specific in your prompts
- You can ask for creative content, summaries, or answers to questions
- The model performs best when given clear instructions
- Try different temperatures to vary creativity vs. coherence
""")
st.header("Sample Prompts")
sample_prompts = [
"Write a short story about a robot discovering emotions",
"Explain quantum computing to a 10-year old",
"Create a recipe for vegan chocolate chip cookies",
"Write a haiku about artificial intelligence",
"Describe the benefits and risks of generative AI"
]
for i, prompt in enumerate(sample_prompts):
if st.button(f"Example {i+1}", key=f"sample_{i}"):
st.session_state.user_prompt = prompt
# Initialize session state variables
if 'user_prompt' not in st.session_state:
st.session_state.user_prompt = ""
if 'generation_complete' not in st.session_state:
st.session_state.generation_complete = False
if 'generated_text' not in st.session_state:
st.session_state.generated_text = ""
if 'error_message' not in st.session_state:
st.session_state.error_message = None
# Model parameters
col1, col2 = st.columns(2)
with col1:
max_length = st.slider("Maximum Length", min_value=50, max_value=1000, value=300, step=50,
help="Maximum number of tokens to generate")
with col2:
temperature = st.slider("Temperature", min_value=0.1, max_value=2.0, value=0.7, step=0.1,
help="Higher values make output more random, lower values more deterministic")
# User input
user_input = st.text_area("Enter your prompt:",
value=st.session_state.user_prompt,
height=100,
placeholder="e.g., Write a short story about a robot discovering emotions")
def generate_text_streaming(prompt, max_new_tokens=300, temperature=0.7):
if not tokenizer or not model:
st.session_state.error_message = "Model not properly loaded. Please check your Hugging Face token."
return None
try:
# Format the prompt according to Gemma's expected format
formatted_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
# Create the output area
output_container = st.empty()
response_area = st.container()
with response_area:
st.markdown("**Generated Response:**")
response_text = st.empty()
# Tokenize the input
encoding = tokenizer(formatted_prompt, return_tensors="pt")
# Move to the appropriate device
if torch.cuda.is_available():
encoding = {k: v.to("cuda") for k, v in encoding.items()}
# Store the length of the input to track new tokens
input_length = encoding["input_ids"].shape[1]
# Initialize generated text container
generated_text = ""
# Generate tokens with streaming
generated_ids = []
# Set up generation configuration
for _ in range(max_new_tokens):
with torch.no_grad():
if len(generated_ids) == 0:
# First token generation
outputs = model.generate(
**encoding,
max_new_tokens=1,
do_sample=True,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
return_dict_in_generate=True,
output_scores=False
)
next_token_id = outputs.sequences[0, input_length:input_length+1]
else:
# Subsequent tokens
current_input_ids = torch.cat([encoding["input_ids"], torch.tensor([generated_ids], device=encoding["input_ids"].device)], dim=1)
outputs = model.generate(
input_ids=current_input_ids,
max_new_tokens=1,
do_sample=True,
temperature=temperature,
pad_token_id=tokenizer.eos_token_id,
return_dict_in_generate=True,
output_scores=False
)
next_token_id = outputs.sequences[0, -1].unsqueeze(0)
# Convert to Python list and append
next_token_id_list = next_token_id.tolist()
generated_ids.extend(next_token_id_list)
# Check for EOS token
if tokenizer.eos_token_id in next_token_id_list:
break
# Decode the tokens generated so far and update the displayed text
current_text = tokenizer.decode(generated_ids, skip_special_tokens=True)
generated_text = current_text
response_text.markdown(generated_text)
return generated_text
except Exception as e:
st.session_state.error_message = f"Error during generation: {str(e)}"
st.error(f"Error during generation: {str(e)}")
return None
# Show any existing error
if st.session_state.error_message:
st.error(f"Error: {st.session_state.error_message}")
# Add troubleshooting information
with st.expander("Troubleshooting Information"):
st.markdown("""
### Common Issues:
1. **Missing Hugging Face Token**: The Gemma model requires authentication. Add your token as a secret named 'HF_TOKEN' in the Space settings.
2. **License Acceptance**: You need to accept the model license on the [Gemma model page](https://huggingface.co/google/gemma-2-2b-it).
3. **Internet Connection**: The model needs to be downloaded the first time the app runs. Ensure your Space has internet access.
4. **Resource Constraints**: The Gemma model requires significant resources. Consider upgrading your Space's hardware if you're encountering memory issues.
### How to Fix:
1. Create a [Hugging Face account](https://huggingface.co/join)
2. Visit the [Gemma model page](https://huggingface.co/google/gemma-2-2b-it) and accept the license
3. Create a token at https://huggingface.co/settings/tokens
4. Add your token to the Space: Settings → Secrets → New Secret (HF_TOKEN)
""")
# Add a debug section
with st.expander("Debug Information"):
st.write(f"Model loaded: {model is not None}")
st.write(f"Tokenizer loaded: {tokenizer is not None}")
st.write(f"Device: {model.device if model else 'N/A'}")
st.write(f"Hugging Face token set: {huggingface_token is not None}")
if torch.cuda.is_available():
st.write(f"CUDA available: True (Device count: {torch.cuda.device_count()})")
else:
st.write("CUDA available: False")
# Generate button
if st.button("Generate Text"):
# Reset any previous errors
st.session_state.error_message = None
if not huggingface_token:
st.error("Hugging Face token is required! Please add your token as described above.")
elif user_input:
st.session_state.user_prompt = user_input
result = generate_text_streaming(user_input, max_length, temperature)
if result is not None: # Only set if no error occurred
st.session_state.generated_text = result
st.session_state.generation_complete = True
else:
st.error("Please enter a prompt first!")
# Analysis section (only show after generation is complete)
if st.session_state.generation_complete and not st.session_state.error_message and st.session_state.generated_text:
# Analysis section
with st.expander("Text Analysis"):
col1, col2 = st.columns(2)
with col1:
st.metric("Character Count", len(st.session_state.generated_text))
st.metric("Word Count", len(st.session_state.generated_text.split()))
with col2:
st.metric("Sentence Count", st.session_state.generated_text.count('.') +
st.session_state.generated_text.count('!') +
st.session_state.generated_text.count('?'))
st.metric("Paragraph Count", st.session_state.generated_text.count('\n\n') + 1)
# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center">
<p>Created with ❤️ | Powered by Gemma 2-2B-IT and Hugging Face</p>
<p>Code available on <a href="https://huggingface.co/spaces" target="_blank">Hugging Face Spaces</a></p>
</div>
""", unsafe_allow_html=True) |