Spaces:

isimorfizam
/

Summarizer

Sleeping

App Files Files Community

isimorfizam commited on Apr 22, 2024

Commit

30bf6ab

1 Parent(s): 65c23ab

Add application file

Browse files

Files changed (1) hide show

app.py +162 -0

app.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers.utils import is_flash_attn_2_available
+from transformers import BitsAndBytesConfig
+import pandas as pd
+import os
+import torch
+import numpy as np
+from scipy import sparse
+from sklearn.metrics.pairwise import cosine_similarity
+from scipy import sparse
+# CHOOSE DEVICE
+model_id = 'google/gemma-2b-it'
+HF_TOKEN = os.environ['HF_TOKEN']
+@st.cache_resource
+def load_model(model_id) :
+    print(torch.backends.mps.is_available())
+    device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
+    print(device)
+    if device=='cpu' :
+        print('Warning! No GPU available')
+    # IMPORT MODEL
+    print(model_id)
+    quantization_config = BitsAndBytesConfig(load_in_4bit=True,
+                                            bnb_4bit_compute_dtype=torch.float16)
+    # if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
+    #   attn_implementation = "flash_attention_2"
+    # else:
+    #   attn_implementation = "sdpa"
+    # print(f"[INFO] Using attention implementation: {attn_implementation}")
+    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)
+    llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
+                                                    token=HF_TOKEN,
+                                                    torch_dtype=torch.float16,
+                                                    #quantization_config=quantization_config if quantization_config else None,
+                                                    low_cpu_mem_usage=False,) # use full memory
+                                                    #attn_implementation=attn_implementation) # which attention version to use
+    llm_model.to(device)
+    return llm_model, tokenizer, device
+# Create a text element and let the reader know the data is loading.
+model_load_state = st.text('Loading model...')
+# Load 10,000 rows of data into the dataframe.
+llm_model, tokenizer, device = load_model(model_id)
+# Notify the reader that the data was successfully loaded.
+model_load_state.text('Loading model...done!')
+# INFERENCE
+# def prompt_formatter(reviews, type_of_doc):
+#     return f"""You are a summarization bot.
+#     You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
+#     {type_of_doc} are listed below under inputs.
+#     Inputs: {reviews}
+#     Answer :
+#     """
+def prompt_formatter(reviews, type_of_doc):
+    return f"""You are a summarization bot.
+    You will receive {type_of_doc} and you will summarize what was said in the input.
+    {type_of_doc} are listed below under inputs.
+    Inputs: {reviews}
+    Answer :
+    """
+def mirror_mirror(inputs, prompt_formatter, tokenizer, type_of_doc):
+    prompt = prompt_formatter(inputs, type_of_doc)
+    input_ids = tokenizer(prompt, return_tensors="pt").to(device)
+    outputs = llm_model.generate(**input_ids,
+                                 temperature=0.3,
+                                 do_sample=True,
+                                 max_new_tokens=275)
+    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return prompt, output_text.replace(prompt, '')
+def summarization(example : list[str], type_of_doc : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :
+    print(type_of_doc)
+    # INFERENCE
+    results = []
+    for cnt in range(0,5) :
+        print(cnt)
+        prompt, result = mirror_mirror(example, prompt_formatter, tokenizer, type_of_doc)
+        list_temp = [result, example]
+        tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
+        A = tokenized.input_ids.numpy()
+        A = sparse.csr_matrix(A)
+        score = cosine_similarity(A)[0,1]
+        #print(cosine_similarity(A)[0,1])
+        #print(cosine_similarity(A)[1,0])
+        if score>0.1 :
+            fin_result = result
+            max_score = score
+            break
+        results.append(result)
+        #print(result+'\n\n')
+    # tokenize results and example together
+    try  :
+        fin_result
+    except :
+    # if fin_result not already defined, use the best of available results
+        # add example to results so tokenization is done together (due to padding limitations)
+        results.append(example)
+        tokenized = tokenizer(results, return_tensors="pt", padding = True)
+        A = tokenized.input_ids.numpy()
+        A = sparse.csr_matrix(A)
+        # calculate cosine similarity of each pair
+        # keep only example X result column
+        scores = cosine_similarity(A)[:,5]
+        # final result is the one with greaters cos_score
+        fin_result = results[np.argmax(scores)]
+        max_score = max(scores)
+    #print(fin_result)
+    # save final result and its attributes
+    row = pd.DataFrame({'model' : model_id, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
+    results_df = pd.concat([results_df,row], ignore_index = True)
+    return results_df
+# adding the text that will show in the text box as default
+default_value = "I am a summarization bot! Let me summarize your reading for you!"
+st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
+st.subheader("--Clockify review summarizer--")
+inputs = st.text_area("Your text", default_value, height = 275)
+type_of_doc = st.text_area("Type of text", 'text', height = 25)
+button = st.button('Summon the summarizer!')
+result = ''
+score = ''
+if  button :
+    results_df = summarization(inputs,type_of_doc)
+    # only one input
+    result = results_df.summarization[0]
+    score = results_df.score[0]
+outputs = st.text_area("Summarized text", result)
+score = st.text_area("Cosine similarity score", score)
+# max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
+# temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
+# top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
+# top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
+# num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)