File size: 6,723 Bytes
e042aad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f56fb77
 
 
e042aad
 
 
f56fb77
e042aad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55dda53
e042aad
 
55dda53
e042aad
 
 
55dda53
b219742
e042aad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aba94a7
e042aad
 
aba94a7
e042aad
 
 
aba94a7
b219742
e042aad
 
 
 
 
 
 
 
 
 
 
 
f56fb77
e042aad
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import requests

BAD_WORD = False
my_api = st.secrets["my_api"]
bad_words = st.secrets["bad_words"]
    
def rev_generate(text, max_length=500, temperature=0.5, top_k=5, do_sample=False, use_cache=True):
    API_URL = "https://api-inference.huggingface.co/models/uripper/ReviewTrainingBot"
    headers = {"Authorization": f"Bearer {my_api}"}
    
    if do_sample:
        use_cache = False
        
    def query(payload):
        response = requests.post(API_URL, headers=headers, json=payload)
        return response.json()
        
    output = query({
        "inputs": f"{text}",
        "parameters": {"max_new_tokens": max_length, "temperature": temperature, "top_p": .95, "do_sample": do_sample, "no_repeat_ngram_size":2},
        "options": {"wait_for_model": True, "use_cache": use_cache},
    })
    return output
    

if "persona_chat_history" not in st.session_state:
    st.session_state.persona_chat_history = []

if "gordon_chat_history" not in st.session_state:
    st.session_state.gordon_chat_history = []
    

def main_page():

    CHAT = False
    REVIEW = False
    


    st.title("AVA")
    st.write("This model generates reviews of films and can be accessed on the drop down menu on the left. \n\nThe model is named after Ava from the movie Ex Machina. To use the model you can enter the name of a movie and generate a review for it or have Ava randomly generate a review. This was created by finetuning a GPT-2 model on a dataset of movie reviews. The dataset was created via scraping around 500,000 letterboxd reviews.")

    st.title("Limitations and biases")
    st.write("The main limitations of the review feature are that it is unable to find links between the movie title and the review itself, and struggles to determine positive and negative sentiment based on the score that is given. It however gives consistently plausible reviews, if not very plausible. It is unable to determine fact, and cannot give truthful reviews or reliably determine actors/directors for any given movie. Its main, and only, use case is for entertainment.")
    st.write("The review bot also has social biases. Due to its underlying model, it has many of the same biases as GPT-2. These biases can be found here: https://huggingface.co/gpt2. In addition to these biases, it also struggles with some of the unique examples of this training dataset. For a concrete example of this, it is fairly common for a review of a movie with gay or lesbian characters to be described as being 'very gay' on letterboxd.com. This is almost always used as a positive thing, but the bot itself is incapable of determining that this is a positive sentiment, and will describe random films this way in a manner that seems more like a slur. This language can likely be extended to other ways that have not been discovered yet, and the model should be handled with care.") 


def review():
    BAD_WORD = False
    st.title("Review")
    
    temperature = st.slider("Temperature", 0.1, 1.0, 0.8, 0.01)
    top_k = st.slider("Top K", 1, 100, 15, 1)
    max_length = st.slider("Max Length", 1, 250, 100, 1)
    do_sample = st.checkbox("Do Sample (If unchecked, will use greedy decoding, not recommended for review due to repetition)", True)

    st.write("Please enter the name of the movie you would like to review. First generation may take up to a minute or more, as the model is loading. Latter generations should load faster.")
    in_movie = st.text_input("Movie")
    review_button = st.button("Generate Review")
    random_review = st.button("Random Review")
    st.write("Please only press Generate Review or Random Review once, it will take a short amount of time to load during the first generation.")
    if review_button: 
        in_movie = "Movie: " + in_movie + " Score:"
        output = rev_generate(in_movie, max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)
                
        check_output = output[0]["generated_text"]
        check_output = check_output.split(" ")
        for i in check_output:
            for j in bad_words:
                if i.lower() is j:
                    BAD_WORD =True
                    
                
        print(output)
        output = output[0]["generated_text"]

        if BAD_WORD == True:

            st.write("The bot generated a slur, please try again.")
            BAD_WORD = False
        else:
            out_movie =output.split("Score:")[0]
            out_movie = out_movie.replace("Movie: ", "").replace("|","")
            score = output.split("Review:")[0]
            score = score.split("Score:")[1]
            score = score.replace("|","")
            review = output.split("Review:")[1] 
            
            review = review.replace("…", ".")
            review = review.replace("...", ".").replace("|","")
            review = review.replace("<br/>", "/n").replace("br/>","").replace("br","").replace("<","").replace(">","")
            

            st.write("Movie:")
            st.write(out_movie)
            st.write("Score:")
            st.write(score)
            st.write("Review:")
            st.write(review)
    
    if random_review:
        output = rev_generate("Movie:", max_length=max_length, temperature=temperature, top_k=top_k, do_sample=do_sample)     
        check_output = output[0]["generated_text"]
        check_output = check_output.split(" ")
        for i in check_output:
            for j in bad_words:
                if i.lower() is j:
                    BAD_WORD =True
        print(output)
        output = output[0]["generated_text"]
        if BAD_WORD == True:
            st.write(i)
            st.write("The bot generated a slur, please try again.")
            BAD_WORD = False
        else:
            out_movie =output.split("Score:")[0]
            out_movie = out_movie.replace("Movie: ", "").replace("|","")
            score = output.split("Review:")[0]
            score = score.split("Score:")[1]
            score = score.replace("|","")
            review = output.split("Review:")[1] 
            
            review = review.replace("…", ".")
            review = review.replace("...", ".").replace("|","")
            review = review.replace("<br/>", "/n").replace("br/>","").replace("br","").replace("<","").replace(">","")

            st.write("Movie:")
            st.write(out_movie)
            st.write("Score:")
            st.write(score)
            st.write("Review:")
            st.write(review)
        


page_names_to_funcs = {
    "Main Page": main_page,
    "Ava": review,
}

selected_page = st.sidebar.selectbox("Select a page", page_names_to_funcs.keys())
page_names_to_funcs[selected_page]()