Spaces:

Tuana
/

hackernews-summaries

Running

App Files Files Community

Tuana commited on Feb 2, 2024

Commit

d61b6cb

1 Parent(s): 3979058

first working app

Browse files

Files changed (9) hide show

.github/workflows/hf_sync.yml +20 -0
.gitignore +1 -0
README.md +10 -1
app.py +58 -0
requirements.txt +5 -0
utils/config.py +5 -0
utils/hackernews_fetcher.py +27 -0
utils/haystack.py +44 -0
utils/ui.py +56 -0

.github/workflows/hf_sync.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://Tuana:$HF_TOKEN@huggingface.co/spaces/Tuana/hackernews-summaries main

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

README.md CHANGED Viewed

	@@ -1 +1,10 @@
1	- ~~# hackernews-summaries~~

+---
+title: Should I follow?
+emoji: 🧡
+colorFrom: orange
+colorTo: yellow
+sdk: streamlit
+sdk_version: 1.5.0
+app_file: app.py
+pinned: false
+---

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from annotated_text import annotation
+from json import JSONDecodeError
+import logging
+from markdown import markdown
+import requests
+import streamlit as st
+from utils.haystack import query, start_haystack
+from utils.ui import reset_results, set_initial_state, sidebar
+set_initial_state()
+sidebar()
+st.write("# Get the summaries of latest top Hacker News posts 🧡")
+if st.session_state.get("HF_TGI_TOKEN"):
+    pipeline = start_haystack(st.session_state.get("HF_TGI_TOKEN"))
+    st.session_state["api_key_configured"] = True
+    search_bar, button = st.columns(2)
+    # Search bar
+    with search_bar:
+        top_k = st.slider('How many of the top posts should I summarize?', 0, 5, 0)
+    with button:
+        st.write("")
+        st.write("")
+        run_pressed = st.button("Get summaries")
+else:
+    st.write("Please provide your Hugging Face Token to start using the application")
+    st.write("If you are using a smaller screen, open the sidebar from the top left to provide your token 🙌")
+if st.session_state.get("api_key_configured"):
+    run_query = (
+        run_pressed or top_k != st.session_state.top_k
+    )
+    # Get results for query
+    if run_query and top_k:
+        reset_results()
+        st.session_state.username = top_k
+        with st.spinner("🔎"):
+            try:
+                st.session_state.result = query(top_k, pipeline)
+            except JSONDecodeError as je:
+                st.error(
+                    "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
+                )
+            except Exception as e:
+                logging.exception(e)
+                st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+    if st.session_state.result:
+        summaries = st.session_state.result
+        st.write(summaries[0])

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+haystack-ai==2.0.0b5
+streamlit==1.25.0
+markdown
+python-dotenv
+newspaper3k

utils/config.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+from dotenv import load_dotenv
+load_dotenv()
+TWITTER_BEARER = os.getenv('HF_TGI_TOKEN')

utils/hackernews_fetcher.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import List
+from haystack import component, Document
+from newspaper import Article
+import requests
+@component
+class HackernewsFetcher():
+  @component.output_types(articles=List[Document])
+  def run(self, top_k: int):
+    newest_list = requests.get(url='https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty')
+    articles = []
+    for id in newest_list.json()[0:top_k]:
+      article = requests.get(url=f"https://hacker-news.firebaseio.com/v0/item/{id}.json?print=pretty")
+      if 'url' in article.json():
+        articles.append(article.json()['url'])
+    docs = []
+    for url in articles:
+      try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        docs.append(Document(content=article.text, meta={'title': article.title, 'url': url}))
+      except:
+        print(f"Couldn't download {url}, skipped")
+    return {'articles': docs}

utils/haystack.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import streamlit as st
+from haystack import Pipeline
+from haystack.components.builders.prompt_builder import PromptBuilder
+from haystack.components.generators import HuggingFaceTGIGenerator
+from .hackernews_fetcher import HackernewsFetcher
+def start_haystack(hf_token):
+    prompt_template = """
+You will be provided one or more top HakcerNews posts, followed by their URL.
+For the posts you have, provide a short summary followed by the URL that the post can be found at.
+Posts:
+{% for article in articles %}
+  Post content: {{article.content}}
+  Post URL: {{article.meta['url']}}
+{% endfor %}
+Summaries:
+"""
+    prompt_builder = PromptBuilder(template=prompt_template)
+    llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
+    fetcher = HackernewsFetcher()
+    pipe = Pipeline()
+    pipe.add_component("hackernews_fetcher", fetcher)
+    pipe.add_component("prompt_builder", prompt_builder)
+    pipe.add_component("llm", llm)
+    pipe.connect("hackernews_fetcher.articles", "prompt_builder.articles")
+    pipe.connect("prompt_builder.prompt", "llm.prompt")
+    return pipe
+@st.cache_data(show_spinner=True)
+def query(top_k, _pipeline):
+    try:
+        replies = _pipeline.run(data={"hackernews_fetcher": {"top_k": top_k},
+                                      "llm": {"generation_kwargs": {"max_new_tokens": 600}}
+                                      })
+        result = replies['llm']['replies']
+    except Exception as e:
+        result = ["Sorry, there seems to be an issue here 😔"]
+    return result

utils/ui.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+from PIL import Image
+def set_state_if_absent(key, value):
+    if key not in st.session_state:
+        st.session_state[key] = value
+def set_initial_state():
+    set_state_if_absent("top_k", "How many of the top posts would you like a summary for?")
+    set_state_if_absent("result", None)
+    set_state_if_absent("haystack_started", False)
+def reset_results(*args):
+    st.session_state.result = None
+    st.session_state.top_k = None
+def set_openai_api_key(api_key: str):
+    st.session_state["HF_TGI_TOKEN"] = api_key
+def sidebar():
+    with st.sidebar:
+        # image = Image.open('logo/haystack-logo-colored.png')
+        st.markdown("Thanks for coming to this 🤗 Space.\n\n"
+        "This is a project for fun, and is not a final product."
+        " There's a lot that can be improved to make this app better.\n\n"
+        "**Take results with a grain of** 🧂\n\n"
+        "For more on how this was built, instructions to run locally and to contribute: [visit GitHub](https://github.com/TuanaCelik/hackernews-summaries#readme)")
+        st.markdown(
+            "## How to use\n"
+            "1. Enter your Hugging Face Token below\n"
+            "2. Select the number of summaries you want\n"
+            "3. Enjoy 🤗\n"
+        )
+        api_key_input = st.text_input(
+            "Hugging Face Token",
+            type="password",
+            placeholder="Paste your Hugging Face TGI Token",
+            help="You can get your API key from https://platform.openai.com/account/api-keys.",
+            value=st.session_state.get("HF_TGI_TOKEN", ""),
+        )
+        if api_key_input:
+            set_openai_api_key(api_key_input)
+        st.markdown("---")
+        st.markdown(
+            "## How this works\n"
+            "This app was built with [Haystack 2.0-Beta](https://haystack.deepset.ai) using the"
+            " [`HuggingFaceTGIGenerator`](https://docs.haystack.deepset.ai/v2.0/docs/hugginfacetgigenerator and [`PromptBuilder`](https://docs.haystack.deepset.ai/v2.0/docs/promptbuilder).\n\n"
+            " The source code is also on [GitHub](https://github.com/TuanaCelik/hackernews-summaries)"
+            " with instructions to run locally.\n"
+            "You can see how the `PromptBuilder` was set up [here](https://github.com/TuanaCelik/hackernews-summaries/blob/main/utils/haystack.py)")
+        st.markdown("---")
+        st.markdown("Made by [tuanacelik](https://twitter.com/tuanacelik)")