Spaces:

RamAnanth1
/

Dolly-v2

Sleeping

App Files Files Community

Dolly-v2 / app.py

RamAnanth1

Modify response to use streaming

ec187f6 over 1 year ago

raw

history blame

3.28 kB

	import gradio as gr
	from transformers import pipeline
	import torch

	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	radius_size=gr.themes.sizes.radius_sm,
	font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
	)

	instruct_pipeline = pipeline(model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
	def generate(instruction):
	response = instruct_pipeline(instruction)
	result = ""
	for word in response.split(" "):
	result += word + " "
	yield result


	examples = [
	"Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
	"How do I make a campfire?",
	"Write me a tweet about the release of Dolly 2.0, a new LLM"

	]


	def process_example(args):
	for x in generate(args):
	pass
	return x

	css = ".generating {visibility: hidden}"

	with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
	with gr.Column():
	gr.Markdown(
	""" ## Dolly 2.0
	Dolly 2.0 is a 12B parameter language model based on the EleutherAI pythia model family and fine-tuned exclusively on a new, high-quality human generated instruction following dataset, crowdsourced among Databricks employees. For more details, please refer to the [model card](https://huggingface.co/databricks/dolly-v2-12b)

	Type in the box below and click the button to generate answers to your most pressing questions!

	"""
	)
	gr.HTML("<p>You can duplicate this Space to run it privately without a queue for shorter queue times : <a style='display:inline-block' href='https://huggingface.co/spaces/RamAnanth1/Dolly-v2?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a> </p>")

	with gr.Row():
	with gr.Column(scale=3):
	instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")

	with gr.Box():
	gr.Markdown("Answer")
	output = gr.Markdown(elem_id="q-output")
	submit = gr.Button("Generate", variant="primary")
	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_example,
	outputs=[output],
	)


	submit.click(generate, inputs=[instruction], outputs=[output])
	instruction.submit(generate, inputs=[instruction], outputs=[output])

	demo.queue(concurrency_count=16).launch(debug=True)