Tuana commited on
Commit
d61b6cb
1 Parent(s): 3979058

first working app

Browse files
.github/workflows/hf_sync.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push --force https://Tuana:$HF_TOKEN@huggingface.co/spaces/Tuana/hackernews-summaries main
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
README.md CHANGED
@@ -1 +1,10 @@
1
- # hackernews-summaries
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Should I follow?
3
+ emoji: 🧡
4
+ colorFrom: orange
5
+ colorTo: yellow
6
+ sdk: streamlit
7
+ sdk_version: 1.5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from annotated_text import annotation
3
+ from json import JSONDecodeError
4
+ import logging
5
+ from markdown import markdown
6
+ import requests
7
+
8
+ import streamlit as st
9
+
10
+ from utils.haystack import query, start_haystack
11
+ from utils.ui import reset_results, set_initial_state, sidebar
12
+
13
+ set_initial_state()
14
+
15
+ sidebar()
16
+
17
+ st.write("# Get the summaries of latest top Hacker News posts 🧡")
18
+
19
+ if st.session_state.get("HF_TGI_TOKEN"):
20
+ pipeline = start_haystack(st.session_state.get("HF_TGI_TOKEN"))
21
+ st.session_state["api_key_configured"] = True
22
+ search_bar, button = st.columns(2)
23
+ # Search bar
24
+ with search_bar:
25
+ top_k = st.slider('How many of the top posts should I summarize?', 0, 5, 0)
26
+
27
+ with button:
28
+ st.write("")
29
+ st.write("")
30
+ run_pressed = st.button("Get summaries")
31
+ else:
32
+ st.write("Please provide your Hugging Face Token to start using the application")
33
+ st.write("If you are using a smaller screen, open the sidebar from the top left to provide your token 🙌")
34
+
35
+ if st.session_state.get("api_key_configured"):
36
+ run_query = (
37
+ run_pressed or top_k != st.session_state.top_k
38
+ )
39
+
40
+ # Get results for query
41
+ if run_query and top_k:
42
+ reset_results()
43
+ st.session_state.username = top_k
44
+ with st.spinner("🔎"):
45
+ try:
46
+ st.session_state.result = query(top_k, pipeline)
47
+ except JSONDecodeError as je:
48
+ st.error(
49
+ "👓    An error occurred reading the results. Is the document store working?"
50
+ )
51
+ except Exception as e:
52
+ logging.exception(e)
53
+ st.error("🐞    An error occurred during the request.")
54
+
55
+ if st.session_state.result:
56
+ summaries = st.session_state.result
57
+ st.write(summaries[0])
58
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ haystack-ai==2.0.0b5
2
+ streamlit==1.25.0
3
+ markdown
4
+ python-dotenv
5
+ newspaper3k
utils/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+ TWITTER_BEARER = os.getenv('HF_TGI_TOKEN')
utils/hackernews_fetcher.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from haystack import component, Document
3
+ from newspaper import Article
4
+ import requests
5
+
6
+ @component
7
+ class HackernewsFetcher():
8
+
9
+ @component.output_types(articles=List[Document])
10
+ def run(self, top_k: int):
11
+ newest_list = requests.get(url='https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty')
12
+ articles = []
13
+ for id in newest_list.json()[0:top_k]:
14
+ article = requests.get(url=f"https://hacker-news.firebaseio.com/v0/item/{id}.json?print=pretty")
15
+ if 'url' in article.json():
16
+ articles.append(article.json()['url'])
17
+
18
+ docs = []
19
+ for url in articles:
20
+ try:
21
+ article = Article(url)
22
+ article.download()
23
+ article.parse()
24
+ docs.append(Document(content=article.text, meta={'title': article.title, 'url': url}))
25
+ except:
26
+ print(f"Couldn't download {url}, skipped")
27
+ return {'articles': docs}
utils/haystack.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from haystack import Pipeline
3
+ from haystack.components.builders.prompt_builder import PromptBuilder
4
+ from haystack.components.generators import HuggingFaceTGIGenerator
5
+ from .hackernews_fetcher import HackernewsFetcher
6
+
7
+ def start_haystack(hf_token):
8
+ prompt_template = """
9
+ You will be provided one or more top HakcerNews posts, followed by their URL.
10
+ For the posts you have, provide a short summary followed by the URL that the post can be found at.
11
+
12
+ Posts:
13
+ {% for article in articles %}
14
+ Post content: {{article.content}}
15
+ Post URL: {{article.meta['url']}}
16
+ {% endfor %}
17
+ Summaries:
18
+ """
19
+
20
+ prompt_builder = PromptBuilder(template=prompt_template)
21
+ llm = HuggingFaceTGIGenerator("mistralai/Mixtral-8x7B-Instruct-v0.1", token=hf_token)
22
+ fetcher = HackernewsFetcher()
23
+
24
+ pipe = Pipeline()
25
+ pipe.add_component("hackernews_fetcher", fetcher)
26
+ pipe.add_component("prompt_builder", prompt_builder)
27
+ pipe.add_component("llm", llm)
28
+
29
+ pipe.connect("hackernews_fetcher.articles", "prompt_builder.articles")
30
+ pipe.connect("prompt_builder.prompt", "llm.prompt")
31
+ return pipe
32
+
33
+
34
+ @st.cache_data(show_spinner=True)
35
+ def query(top_k, _pipeline):
36
+ try:
37
+ replies = _pipeline.run(data={"hackernews_fetcher": {"top_k": top_k},
38
+ "llm": {"generation_kwargs": {"max_new_tokens": 600}}
39
+ })
40
+
41
+ result = replies['llm']['replies']
42
+ except Exception as e:
43
+ result = ["Sorry, there seems to be an issue here 😔"]
44
+ return result
utils/ui.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+
4
+ def set_state_if_absent(key, value):
5
+ if key not in st.session_state:
6
+ st.session_state[key] = value
7
+
8
+ def set_initial_state():
9
+ set_state_if_absent("top_k", "How many of the top posts would you like a summary for?")
10
+ set_state_if_absent("result", None)
11
+ set_state_if_absent("haystack_started", False)
12
+
13
+ def reset_results(*args):
14
+ st.session_state.result = None
15
+ st.session_state.top_k = None
16
+
17
+ def set_openai_api_key(api_key: str):
18
+ st.session_state["HF_TGI_TOKEN"] = api_key
19
+
20
+ def sidebar():
21
+ with st.sidebar:
22
+ # image = Image.open('logo/haystack-logo-colored.png')
23
+ st.markdown("Thanks for coming to this 🤗 Space.\n\n"
24
+ "This is a project for fun, and is not a final product."
25
+ " There's a lot that can be improved to make this app better.\n\n"
26
+ "**Take results with a grain of** 🧂\n\n"
27
+ "For more on how this was built, instructions to run locally and to contribute: [visit GitHub](https://github.com/TuanaCelik/hackernews-summaries#readme)")
28
+
29
+ st.markdown(
30
+ "## How to use\n"
31
+ "1. Enter your Hugging Face Token below\n"
32
+ "2. Select the number of summaries you want\n"
33
+ "3. Enjoy 🤗\n"
34
+ )
35
+
36
+ api_key_input = st.text_input(
37
+ "Hugging Face Token",
38
+ type="password",
39
+ placeholder="Paste your Hugging Face TGI Token",
40
+ help="You can get your API key from https://platform.openai.com/account/api-keys.",
41
+ value=st.session_state.get("HF_TGI_TOKEN", ""),
42
+ )
43
+
44
+ if api_key_input:
45
+ set_openai_api_key(api_key_input)
46
+
47
+ st.markdown("---")
48
+ st.markdown(
49
+ "## How this works\n"
50
+ "This app was built with [Haystack 2.0-Beta](https://haystack.deepset.ai) using the"
51
+ " [`HuggingFaceTGIGenerator`](https://docs.haystack.deepset.ai/v2.0/docs/hugginfacetgigenerator and [`PromptBuilder`](https://docs.haystack.deepset.ai/v2.0/docs/promptbuilder).\n\n"
52
+ " The source code is also on [GitHub](https://github.com/TuanaCelik/hackernews-summaries)"
53
+ " with instructions to run locally.\n"
54
+ "You can see how the `PromptBuilder` was set up [here](https://github.com/TuanaCelik/hackernews-summaries/blob/main/utils/haystack.py)")
55
+ st.markdown("---")
56
+ st.markdown("Made by [tuanacelik](https://twitter.com/tuanacelik)")