testing streaming pipeline

#14
No description provided.

how to use

import gradio as gr
from transformers import pipeline

pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True) # trust here

def talk(prompt,history=[]):
  yield from pipe(prompt,history=history,stream=True,max_new_tokens=50) # use yield from

demo = gr.ChatInterface(fn=talk,
    chatbot=gr.Chatbot(),
    examples=[["hi",[]]],
    cache_examples=False,
    title="Streaming")
demo.launch(debug=True)

else you can format the prompt outside and pass it to the pipeline

examples without gradio

from transformers import pipeline

pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True) 
text  = "hello" # or you can apply your chat template and feed everything as a singleton parameter named prompt
s = pipe(prompt = text,stream=True,max_new_tokens=50)
for i in s : 
    print(i)
from transformers import pipeline

pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True) 
text = pipe(prompt,max_new_tokens=50)
print(text)
Ready to merge
This branch is ready to get merged automatically.

Sign up or log in to comment