Spaces:

llamazookeeper
/

ftcrules

Build error

App Files Files Community

llamazookeeper commited on Oct 29, 2023

Commit

3d803ed

•

1 Parent(s): d36b7c3

article

Browse files

Files changed (2) hide show

app.py +68 -115
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,116 +1,69 @@
-# Import streamlit for app dev
 import streamlit as st
-# Import transformer classes for generaiton
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
-# Import torch for datatype attributes
-import torch
-# Import the prompt wrapper...but for llama index
-from llama_index.prompts.prompts import SimpleInputPrompt
-# Import the llama index HF Wrapper
-from llama_index.llms import HuggingFaceLLM
-# Bring in embeddings wrapper
-from llama_index.embeddings import LangchainEmbedding
-# Bring in HF embeddings - need these to represent document chunks
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-# Bring in stuff to change service context
-from llama_index import set_global_service_context
-from llama_index import ServiceContext
-# Import deps to load documents
-from llama_index import VectorStoreIndex, download_loader
-from pathlib import Path
-# Define variable to hold llama2 weights naming
-#name = "meta-llama/Llama-2-70b-chat-hf"
-name = "mistralai/Mistral-7B-v0.1"
-# Set auth token variable from hugging face
-#auth_token = "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"  #llamazookeeper
-auth_token = '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'
-@st.cache_resource
-def get_tokenizer_model():
-    # Create tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(name, cache_dir='./model/', use_auth_token=auth_token)
-    # Create model
-    model = AutoModelForCausalLM.from_pretrained(name, cache_dir='./model/'
-                            , use_auth_token=auth_token, torch_dtype=torch.float16,
-                            rope_scaling={"type": "dynamic", "factor": 2}, load_in_8bit=True)
-    return tokenizer, model
-tokenizer, model = get_tokenizer_model()
-# Create a system prompt
-system_prompt = """<s>[INST] <<SYS>>
-You are a helpful, respectful and honest assistant. Always answer as
-helpfully as possible, while being safe. Your answers should not include
-any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
-Please ensure that your responses are socially unbiased and positive in nature.
-If a question does not make any sense, or is not factually coherent, explain
-why instead of answering something not correct. If you don't know the answer
-to a question, please don't share false information.
-Your goal is to provide answers relating to the financial performance of
-the company.<</SYS>>
-"""
-# Throw together the query wrapper
-query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")
-# Create a HF LLM using the llama index wrapper
-llm = HuggingFaceLLM(context_window=4096,
-                    max_new_tokens=256,
-                    system_prompt=system_prompt,
-                    query_wrapper_prompt=query_wrapper_prompt,
-                    model=model,
-                    tokenizer=tokenizer)
-# Create and dl embeddings instance
-embeddings=LangchainEmbedding(
-    HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
-)
-# Create new service context instance
-service_context = ServiceContext.from_defaults(
-    chunk_size=1024,
-    llm=llm,
-    embed_model=embeddings
-)
-# And set the service context
-set_global_service_context(service_context)
-# Download PDF Loader
-PyMuPDFReader = download_loader("PyMuPDFReader")
-# Create PDF Loader
-loader = PyMuPDFReader()
-# Load documents
-documents = loader.load(file_path=Path('/content/*.pdf'), metadata=True)
-# Create an index - we'll be able to query this in a sec
-index = VectorStoreIndex.from_documents(documents)
-# Setup index query engine using LLM
-query_engine = index.as_query_engine()
-# Create centered main title
-#st.title('🦙 Llama Banker')
-# Create a text input box for the user
-prompt = st.text_input('Input your prompt here')
-# If the user hits enter
-if prompt:
-    response = query_engine.query(prompt)
-    # ...and write it out to the screen
-    st.write(response)
-    # Display raw response object
-    with st.expander('Response Object'):
-        st.write(response)
-    # Display source text
-    with st.expander('Source Text'):
-        st.write(response.get_formatted_sources())

+# import
+from tensorflow.python.keras.utils.generic_utils import default
 import streamlit as st
+from newspaper import Article
+from transformers import pipeline
+# set config
+st.set_page_config(layout="wide", page_title="SummarizeLink")
+# load the summarization model (cache for faster loading)
+@st.cache(allow_output_mutation=True)
+def load_summarize_model():
+    # model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6')
+    model = pipeline("summarization")
+    return model
+# loading the model
+summ = load_summarize_model()
+# define the down functions
+def download_and_parse_article(url):
+    """Downloads and parses an article from a URL.
+    Parameters
+    ----------
+    url : str
+        The URL of the article to download and parse.
+    Returns
+    -------
+    article : newspaper.Article
+        The article downloaded and parsed.
+    """
+    # define the article
+    article = Article(url)
+    # download and parse the article
+    article.download()
+    article.parse()
+    # return the article
+    return article.text
+# APP
+# set title and subtitle
+st.title("SummarizeLink")
+st.markdown("Paste any article link below and click on the 'Summarize' button.")
+st.markdown("*Note:* We truncate the text incase the article is lengthy! 🖖")
+# create the input text box and setting panel
+link = st.text_area('Paste your link here...', "https://towardsdatascience.com/a-guide-to-the-knowledge-graphs-bfb5c40272f1", height=50)
+button = st.button("Summarize")
+min_length = st.sidebar.slider('Min summary length', min_value=10, max_value=100, value=50, step=10)
+max_length = st.sidebar.slider('Max summary length', min_value=30, max_value=700, value=100, step=10)
+num_beams = st.sidebar.slider('Beam length', min_value=1, max_value=10, value=5, step=1)
+# if button is clicked
+with st.spinner("Parsing article and Summarizing..."):
+    if button and link:
+        # get the text
+        text = download_and_parse_article(link)
+        # summarize the text
+        summary = summ(text,
+                       truncation=True,
+                       max_length = max_length,
+                       min_length = min_length,
+                       num_beams=num_beams,
+                       do_sample=True,
+                       early_stopping=True,
+                       repetition_penalty=1.5,
+                       length_penalty=1.5)[0]
+        # display the summary
+        st.markdown("**Summary:**")
+        st.write(summary['summary_text'])

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ transformers
 accelerate
 bitsandbytes
 requests

 accelerate
 bitsandbytes
 requests
+newspaper