testing streaming pipeline
#14
by
not-lain
- opened
No description provided.
how to use
import gradio as gr
from transformers import pipeline
pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True) # trust here
def talk(prompt,history=[]):
yield from pipe(prompt,history=history,stream=True,max_new_tokens=50) # use yield from
demo = gr.ChatInterface(fn=talk,
chatbot=gr.Chatbot(),
examples=[["hi",[]]],
cache_examples=False,
title="Streaming")
demo.launch(debug=True)
else you can format the prompt outside and pass it to the pipeline
examples without gradio
from transformers import pipeline
pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True)
text = "hello" # or you can apply your chat template and feed everything as a singleton parameter named prompt
s = pipe(prompt = text,stream=True,max_new_tokens=50)
for i in s :
print(i)
from transformers import pipeline
pipe = pipeline("text-generation", model="google/gemma-1.1-2b-it", revision="refs/pr/14",trust_remote_code=True)
text = pipe(prompt,max_new_tokens=50)
print(text)