File size: 2,036 Bytes
5b60a87
5527a29
824f871
2b0f34f
9e3433c
 
 
 
4950e4d
9e3433c
b7402af
f03eadd
 
9e3433c
4d43c78
b793725
4950e4d
b793725
 
9e3433c
5f7436b
f03eadd
5f7436b
 
 
 
bc4308f
5f7436b
 
 
fab30bb
 
5f7436b
fab30bb
5f7436b
 
 
 
 
 
 
 
ea47071
 
b793725
14ed4a6
5f7436b
14ed4a6
9e3433c
2f2e7ca
5f7436b
2f2e7ca
9e3433c
c2f3e10
14ed4a6
 
5f7436b
14ed4a6
5f7436b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import torch
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the BLOOM model name
model_name = "CreitinGameplays/bloom-3b-conversational"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

@spaces.GPU(duration=70)
def generate_text(user_prompt):
  """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
  # Construct the full prompt with system introduction, user prompt, and assistant role
  prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"

  # Encode the entire prompt into tokens
  prompt_encoded = tokenizer.encode(prompt, return_tensors="pt").to(device)

  # Generate text with the complete prompt and limit the maximum length to 256 tokens
  output = model.generate(
      input_ids=prompt_encoded,
      max_length=1900,
      num_beams=1,
      num_return_sequences=1,  
      do_sample=True,  
      top_k=0,  
      top_p=1.0,  
      temperature=0.2,  
      repetition_penalty=1.1
  )

  # Decode the generated token sequence back to text
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

  # Extract the assistant's response (assuming it starts with "<|assistant|>")
  assistant_response = generated_text.split("<|assistant|>")[-1]
  assistant_response = assistant_response.replace(f"{user_prompt}", "").strip()
  assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
  
  return assistant_response

# Define the Gradio interface
interface = gr.Interface(
  fn=generate_text,
  inputs=[
      gr.Textbox(label="Text Prompt", value="What's an AI?"),
  ],
  outputs="text",
  description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
)


# Launch the Gradio interface
interface.launch()