Spaces:
Sleeping
Sleeping
isimorfizam
commited on
Commit
·
30bf6ab
1
Parent(s):
65c23ab
Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
+
from transformers.utils import is_flash_attn_2_available
|
5 |
+
from transformers import BitsAndBytesConfig
|
6 |
+
import pandas as pd
|
7 |
+
import os
|
8 |
+
import torch
|
9 |
+
import numpy as np
|
10 |
+
from scipy import sparse
|
11 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
+
from scipy import sparse
|
13 |
+
|
14 |
+
|
15 |
+
# CHOOSE DEVICE
|
16 |
+
|
17 |
+
model_id = 'google/gemma-2b-it'
|
18 |
+
HF_TOKEN = os.environ['HF_TOKEN']
|
19 |
+
|
20 |
+
@st.cache_resource
|
21 |
+
def load_model(model_id) :
|
22 |
+
print(torch.backends.mps.is_available())
|
23 |
+
device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
|
24 |
+
print(device)
|
25 |
+
|
26 |
+
if device=='cpu' :
|
27 |
+
print('Warning! No GPU available')
|
28 |
+
|
29 |
+
# IMPORT MODEL
|
30 |
+
|
31 |
+
print(model_id)
|
32 |
+
|
33 |
+
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
|
34 |
+
bnb_4bit_compute_dtype=torch.float16)
|
35 |
+
|
36 |
+
# if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
|
37 |
+
# attn_implementation = "flash_attention_2"
|
38 |
+
# else:
|
39 |
+
# attn_implementation = "sdpa"
|
40 |
+
# print(f"[INFO] Using attention implementation: {attn_implementation}")
|
41 |
+
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)
|
43 |
+
|
44 |
+
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
|
45 |
+
token=HF_TOKEN,
|
46 |
+
torch_dtype=torch.float16,
|
47 |
+
#quantization_config=quantization_config if quantization_config else None,
|
48 |
+
low_cpu_mem_usage=False,) # use full memory
|
49 |
+
#attn_implementation=attn_implementation) # which attention version to use
|
50 |
+
llm_model.to(device)
|
51 |
+
return llm_model, tokenizer, device
|
52 |
+
|
53 |
+
# Create a text element and let the reader know the data is loading.
|
54 |
+
model_load_state = st.text('Loading model...')
|
55 |
+
# Load 10,000 rows of data into the dataframe.
|
56 |
+
llm_model, tokenizer, device = load_model(model_id)
|
57 |
+
# Notify the reader that the data was successfully loaded.
|
58 |
+
model_load_state.text('Loading model...done!')
|
59 |
+
|
60 |
+
# INFERENCE
|
61 |
+
# def prompt_formatter(reviews, type_of_doc):
|
62 |
+
# return f"""You are a summarization bot.
|
63 |
+
# You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
|
64 |
+
# {type_of_doc} are listed below under inputs.
|
65 |
+
# Inputs: {reviews}
|
66 |
+
# Answer :
|
67 |
+
# """
|
68 |
+
def prompt_formatter(reviews, type_of_doc):
|
69 |
+
return f"""You are a summarization bot.
|
70 |
+
You will receive {type_of_doc} and you will summarize what was said in the input.
|
71 |
+
{type_of_doc} are listed below under inputs.
|
72 |
+
Inputs: {reviews}
|
73 |
+
Answer :
|
74 |
+
"""
|
75 |
+
def mirror_mirror(inputs, prompt_formatter, tokenizer, type_of_doc):
|
76 |
+
prompt = prompt_formatter(inputs, type_of_doc)
|
77 |
+
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
|
78 |
+
outputs = llm_model.generate(**input_ids,
|
79 |
+
temperature=0.3,
|
80 |
+
do_sample=True,
|
81 |
+
max_new_tokens=275)
|
82 |
+
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
83 |
+
return prompt, output_text.replace(prompt, '')
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
def summarization(example : list[str], type_of_doc : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :
|
88 |
+
|
89 |
+
print(type_of_doc)
|
90 |
+
# INFERENCE
|
91 |
+
results = []
|
92 |
+
for cnt in range(0,5) :
|
93 |
+
print(cnt)
|
94 |
+
prompt, result = mirror_mirror(example, prompt_formatter, tokenizer, type_of_doc)
|
95 |
+
list_temp = [result, example]
|
96 |
+
tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
|
97 |
+
A = tokenized.input_ids.numpy()
|
98 |
+
A = sparse.csr_matrix(A)
|
99 |
+
score = cosine_similarity(A)[0,1]
|
100 |
+
#print(cosine_similarity(A)[0,1])
|
101 |
+
#print(cosine_similarity(A)[1,0])
|
102 |
+
|
103 |
+
if score>0.1 :
|
104 |
+
fin_result = result
|
105 |
+
max_score = score
|
106 |
+
break
|
107 |
+
|
108 |
+
results.append(result)
|
109 |
+
#print(result+'\n\n')
|
110 |
+
|
111 |
+
# tokenize results and example together
|
112 |
+
try :
|
113 |
+
fin_result
|
114 |
+
except :
|
115 |
+
# if fin_result not already defined, use the best of available results
|
116 |
+
# add example to results so tokenization is done together (due to padding limitations)
|
117 |
+
results.append(example)
|
118 |
+
tokenized = tokenizer(results, return_tensors="pt", padding = True)
|
119 |
+
A = tokenized.input_ids.numpy()
|
120 |
+
A = sparse.csr_matrix(A)
|
121 |
+
# calculate cosine similarity of each pair
|
122 |
+
# keep only example X result column
|
123 |
+
scores = cosine_similarity(A)[:,5]
|
124 |
+
# final result is the one with greaters cos_score
|
125 |
+
fin_result = results[np.argmax(scores)]
|
126 |
+
max_score = max(scores)
|
127 |
+
|
128 |
+
#print(fin_result)
|
129 |
+
# save final result and its attributes
|
130 |
+
row = pd.DataFrame({'model' : model_id, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
|
131 |
+
results_df = pd.concat([results_df,row], ignore_index = True)
|
132 |
+
|
133 |
+
return results_df
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
# adding the text that will show in the text box as default
|
139 |
+
default_value = "I am a summarization bot! Let me summarize your reading for you!"
|
140 |
+
st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
|
141 |
+
st.subheader("--Clockify review summarizer--")
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
inputs = st.text_area("Your text", default_value, height = 275)
|
146 |
+
type_of_doc = st.text_area("Type of text", 'text', height = 25)
|
147 |
+
button = st.button('Summon the summarizer!')
|
148 |
+
result = ''
|
149 |
+
score = ''
|
150 |
+
if button :
|
151 |
+
results_df = summarization(inputs,type_of_doc)
|
152 |
+
# only one input
|
153 |
+
result = results_df.summarization[0]
|
154 |
+
score = results_df.score[0]
|
155 |
+
|
156 |
+
outputs = st.text_area("Summarized text", result)
|
157 |
+
score = st.text_area("Cosine similarity score", score)
|
158 |
+
# max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
|
159 |
+
# temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
|
160 |
+
# top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
|
161 |
+
# top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
|
162 |
+
# num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)
|