File size: 1,369 Bytes
fc952fa
4032d8e
c9d2ea5
4032d8e
cb066d9
4032d8e
 
cb066d9
4032d8e
fc952fa
c9d2ea5
 
 
 
 
 
13e1200
4032d8e
c9d2ea5
 
 
 
 
c43bf85
c9d2ea5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from huggingface_hub import login
import os

# Authenticate to Hugging Face
hugging_face_token = os.getenv('key')  # Replace with your actual token
login(hugging_face_token)

# Load the dataset
ds = load_dataset("Vezora/Open-Critic-GPT")

st.write("Dataset")

# Load the model and tokenizer
model_name = "google/gemma-2-2b"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Function to generate a response from the model
def generate_response(human_text):
    inputs = tokenizer.encode(human_text, return_tensors='pt')
    outputs = model.generate(inputs, max_new_tokens=200, max_length=200, num_beams=5, early_stopping=True)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Iterate over the first few examples in the dataset and display them with model responses
for i, x in enumerate(ds["train"]):
    col1, col2, col3 = st.columns(3)
    if i < 3:
        with col1:
            st.code(x["Human"])

        with col2:
            st.write(x["Assistant"])

        with col3:
            # Generate and display the model's response
            response = generate_response(x["Human"])
            st.write(response)
    else:
        break