Spaces:

Tonic
/

Genstruct

Runtime error

File size: 5,413 Bytes

065cadb
 
3d8d7f7
065cadb
 
 
 
 
 
 
 
14f0d65
065cadb
 
 
 
cfcb9b3
b0f5b7b
3d8d7f7
64217ab
 
065cadb
 
b3d909b
685a1c4
 
 
 
 
05effe6
b9614d5
b3d909b
28147d3
7b47d03
0583e90
78fb08c
64217ab
 
3d8d7f7
7b47d03
 
29d6a8c
 
05effe6
3d8d7f7
 
 
 
065cadb
 
 
 
fc2956a
 
065cadb
 
 
 
5bbbb38
065cadb
 
 
fc2956a
065cadb

import spaces
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig
import torch
from gradio_rich_textbox import RichTextbox

title = """# Welcome to 🌟Tonic's🧬📏💪🏻Genstruct 7B !
🧬📏💪🏻[Genstruct 7B](https://huggingface.co/NousResearch/Genstruct-7B) is an instruction-generation model, designed to create valid instructions given a raw text corpus. This enables the creation of new, partially synthetic instruction finetuning datasets from any raw-text corpus. You can build with this endpoint using🧬📏💪🏻[Genstruct 7B](https://huggingface.co/NousResearch/Genstruct-7B) available here : [NousResearch/Genstruct-7B](https://huggingface.co/NousResearch/Genstruct-7B). You can also use ✨StarCoder by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/starcoder2?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></h3> 
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/multitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""

examplecofee = """A cortado is a Spanish beverage consisting of espresso mixed with a roughly equal amount of warm milk to reduce the acidity,[1][2] although the exact ratios have considerable regional variation.[3] The milk in a cortado is steamed, but not frothy and "texturized" as in many Italian coffee drinks.[4] The cortado is commonly served all over Spain.[5] The word cortado is the past participle of the Spanish verb cortar (to cut), in the sense of "dilute", and can refer variously to either coffee or espresso drinks throughout Spanish and Portuguese speaking countries."""

model_path = "NousResearch/Genstruct-7B"

tokenizer = AutoTokenizer.from_pretrained(model_path)
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map='cuda', quantization_config=quantization_config)
rm_tokenizer = AutoTokenizer.from_pretrained('OpenAssistant/reward-model-deberta-v3-large-v2')
rm_model = AutoModelForSequenceClassification.from_pretrained('OpenAssistant/reward-model-deberta-v3-large-v2', device_map='cuda', torch_dtype=torch.bfloat16)


@spaces.GPU
def generate_text(usertitle, content, temperature, max_length, N=3):
    # msg ={
    # 'title': usertitle,
    # 'content': content
    # }
    input_text = f"[[[title:]]] {usertitle}\n[[[content:]]]{content}\n\n"
    inputs = tokenizer(input_text, return_tensors='pt').to('cuda')
    attention_mask = torch.ones(inputs['input_ids'].shape, dtype=torch.long, device='cuda')
    generated_sequences = model.generate(inputs['input_ids'], attention_mask=attention_mask, temperature=temperature, max_length=max_length, pad_token_id=tokenizer.eos_token_id, num_return_sequences=N, do_sample=True)
    decoded_sequences = [tokenizer.decode(g, skip_special_tokens=True) for g in generated_sequences]
    def score(sequence):
        inputs = rm_tokenizer(sequence, return_tensors='pt', padding=True, truncation=True, max_length=512).to('cuda')
        inputs = {k: v.to('cuda') for k, v in inputs.items()} 
#       print(rm_model.device)
#       print(inputs['input_ids'].device) 
        with torch.no_grad():
            outputs = rm_model(**inputs)
            logits = outputs.logits
            print("Logits shape:", logits.shape)  
            print("Logits contents:", logits) 
            return logits[0]

    best_sequence = max(decoded_sequences, key=score)

    return best_sequence

def gradio_app():
    with gr.Blocks() as demo:
        gr.Markdown(title)
        usertitle = gr.Textbox(label="Title", value="Cortado", lines=1)
        content = gr.Textbox(label="WordPhrases", value=examplecofee, lines=5)
        with gr.Row():
            temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature")
            max_length = gr.Slider(minimum=250, maximum=1024, step=10, value=450, label="Generate Length")
        generate_btn = gr.Button("Try 🧬📏💪🏻 Genstruct")
        output = gr.Textbox(label="🧬📏💪🏻Genstruct 7B:")

        generate_btn.click(
            fn=generate_text,
            inputs=[usertitle, content, temperature, max_length],
            outputs=output
        )

    demo.launch()

if __name__ == "__main__":
    gradio_app()