isimorfizam commited on
Commit
30bf6ab
·
1 Parent(s): 65c23ab

Add application file

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from transformers.utils import is_flash_attn_2_available
5
+ from transformers import BitsAndBytesConfig
6
+ import pandas as pd
7
+ import os
8
+ import torch
9
+ import numpy as np
10
+ from scipy import sparse
11
+ from sklearn.metrics.pairwise import cosine_similarity
12
+ from scipy import sparse
13
+
14
+
15
+ # CHOOSE DEVICE
16
+
17
+ model_id = 'google/gemma-2b-it'
18
+ HF_TOKEN = os.environ['HF_TOKEN']
19
+
20
+ @st.cache_resource
21
+ def load_model(model_id) :
22
+ print(torch.backends.mps.is_available())
23
+ device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
24
+ print(device)
25
+
26
+ if device=='cpu' :
27
+ print('Warning! No GPU available')
28
+
29
+ # IMPORT MODEL
30
+
31
+ print(model_id)
32
+
33
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True,
34
+ bnb_4bit_compute_dtype=torch.float16)
35
+
36
+ # if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
37
+ # attn_implementation = "flash_attention_2"
38
+ # else:
39
+ # attn_implementation = "sdpa"
40
+ # print(f"[INFO] Using attention implementation: {attn_implementation}")
41
+
42
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)
43
+
44
+ llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
45
+ token=HF_TOKEN,
46
+ torch_dtype=torch.float16,
47
+ #quantization_config=quantization_config if quantization_config else None,
48
+ low_cpu_mem_usage=False,) # use full memory
49
+ #attn_implementation=attn_implementation) # which attention version to use
50
+ llm_model.to(device)
51
+ return llm_model, tokenizer, device
52
+
53
+ # Create a text element and let the reader know the data is loading.
54
+ model_load_state = st.text('Loading model...')
55
+ # Load 10,000 rows of data into the dataframe.
56
+ llm_model, tokenizer, device = load_model(model_id)
57
+ # Notify the reader that the data was successfully loaded.
58
+ model_load_state.text('Loading model...done!')
59
+
60
+ # INFERENCE
61
+ # def prompt_formatter(reviews, type_of_doc):
62
+ # return f"""You are a summarization bot.
63
+ # You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
64
+ # {type_of_doc} are listed below under inputs.
65
+ # Inputs: {reviews}
66
+ # Answer :
67
+ # """
68
+ def prompt_formatter(reviews, type_of_doc):
69
+ return f"""You are a summarization bot.
70
+ You will receive {type_of_doc} and you will summarize what was said in the input.
71
+ {type_of_doc} are listed below under inputs.
72
+ Inputs: {reviews}
73
+ Answer :
74
+ """
75
+ def mirror_mirror(inputs, prompt_formatter, tokenizer, type_of_doc):
76
+ prompt = prompt_formatter(inputs, type_of_doc)
77
+ input_ids = tokenizer(prompt, return_tensors="pt").to(device)
78
+ outputs = llm_model.generate(**input_ids,
79
+ temperature=0.3,
80
+ do_sample=True,
81
+ max_new_tokens=275)
82
+ output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
+ return prompt, output_text.replace(prompt, '')
84
+
85
+
86
+
87
+ def summarization(example : list[str], type_of_doc : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :
88
+
89
+ print(type_of_doc)
90
+ # INFERENCE
91
+ results = []
92
+ for cnt in range(0,5) :
93
+ print(cnt)
94
+ prompt, result = mirror_mirror(example, prompt_formatter, tokenizer, type_of_doc)
95
+ list_temp = [result, example]
96
+ tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
97
+ A = tokenized.input_ids.numpy()
98
+ A = sparse.csr_matrix(A)
99
+ score = cosine_similarity(A)[0,1]
100
+ #print(cosine_similarity(A)[0,1])
101
+ #print(cosine_similarity(A)[1,0])
102
+
103
+ if score>0.1 :
104
+ fin_result = result
105
+ max_score = score
106
+ break
107
+
108
+ results.append(result)
109
+ #print(result+'\n\n')
110
+
111
+ # tokenize results and example together
112
+ try :
113
+ fin_result
114
+ except :
115
+ # if fin_result not already defined, use the best of available results
116
+ # add example to results so tokenization is done together (due to padding limitations)
117
+ results.append(example)
118
+ tokenized = tokenizer(results, return_tensors="pt", padding = True)
119
+ A = tokenized.input_ids.numpy()
120
+ A = sparse.csr_matrix(A)
121
+ # calculate cosine similarity of each pair
122
+ # keep only example X result column
123
+ scores = cosine_similarity(A)[:,5]
124
+ # final result is the one with greaters cos_score
125
+ fin_result = results[np.argmax(scores)]
126
+ max_score = max(scores)
127
+
128
+ #print(fin_result)
129
+ # save final result and its attributes
130
+ row = pd.DataFrame({'model' : model_id, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
131
+ results_df = pd.concat([results_df,row], ignore_index = True)
132
+
133
+ return results_df
134
+
135
+
136
+
137
+
138
+ # adding the text that will show in the text box as default
139
+ default_value = "I am a summarization bot! Let me summarize your reading for you!"
140
+ st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
141
+ st.subheader("--Clockify review summarizer--")
142
+
143
+
144
+
145
+ inputs = st.text_area("Your text", default_value, height = 275)
146
+ type_of_doc = st.text_area("Type of text", 'text', height = 25)
147
+ button = st.button('Summon the summarizer!')
148
+ result = ''
149
+ score = ''
150
+ if button :
151
+ results_df = summarization(inputs,type_of_doc)
152
+ # only one input
153
+ result = results_df.summarization[0]
154
+ score = results_df.score[0]
155
+
156
+ outputs = st.text_area("Summarized text", result)
157
+ score = st.text_area("Cosine similarity score", score)
158
+ # max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
159
+ # temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
160
+ # top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
161
+ # top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
162
+ # num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)