Spaces:
Runtime error
Runtime error
from datetime import datetime | |
import os | |
import uuid | |
import hmac | |
import openai | |
import requests | |
import streamlit as st | |
from azure.cosmos import ContainerProxy, CosmosClient | |
from bs4 import BeautifulSoup, NavigableString | |
from dotenv import load_dotenv | |
from st_copy_to_clipboard import st_copy_to_clipboard | |
from pytrends.request import TrendReq | |
import pytz | |
import xml.etree.ElementTree as ET | |
import re | |
load_dotenv() | |
st.set_page_config(initial_sidebar_state="collapsed") | |
def get_related_studies(article: str): | |
with st.spinner("Extrahiere Studien..."): | |
url = f'https://serpapi.com/search.json?engine=google_scholar&api_key={os.getenv("SERP_API_KEY")}&as_ylo=2018&q=' | |
url += extract_scholar_query(article).replace('"', "") | |
try: | |
response = requests.get(url) | |
if response.status_code == 200: | |
data = response.json() | |
if data.get("organic_results"): | |
results = [] | |
for result in data["organic_results"]: | |
if not result.get("title"): | |
continue | |
if not result.get("link"): | |
continue | |
results.append( | |
{ | |
"title": result["title"], | |
"link": result["link"], | |
} | |
) | |
st.session_state["studie_links"] = results | |
else: | |
st.session_state["studie_links"] = [] | |
else: | |
st.session_state["studie_links"] = [] | |
except Exception as e: | |
print(f"Fehler beim extrahieren der Studien: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
def get_takeaways(article: str): | |
openai.api_key = os.environ.get("OPEN_API_KEY") | |
openai.api_base = os.environ.get("OPEN_API_BASE") | |
openai.api_type = os.environ.get("OPEN_API_TYPE") | |
openai.api_version = os.environ.get("OPEN_API_VERSION") | |
takeaway_query = os.environ.get("takeaway") | |
with st.spinner("Creating Takeaways"): | |
try: | |
res = openai.ChatCompletion.create( | |
engine="gpt-4-1106", | |
temperature=0.2, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f" The article you have written is as follows: {article}.", | |
}, | |
{ | |
"role": "system", | |
"content": f"Schreibe mir zu diesen Artikel Key Takeaways nach folgenden Regeln {takeaway_query}.", | |
}, | |
], | |
) | |
st.session_state["takeaways"] = res["choices"][0]["message"]["content"] | |
except Exception as e: | |
print(f"Fehler beim extrahieren der Query: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
def get_faq(article: str): | |
openai.api_key = os.environ.get("OPEN_API_KEY") | |
openai.api_base = os.environ.get("OPEN_API_BASE") | |
openai.api_type = os.environ.get("OPEN_API_TYPE") | |
openai.api_version = os.environ.get("OPEN_API_VERSION") | |
faq_query = os.environ.get("faq") | |
with st.spinner("Creating FAQ"): | |
try: | |
res = openai.ChatCompletion.create( | |
engine="gpt-4-1106", | |
temperature=0.2, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f" The article you have written is as follows: {article}.", | |
}, | |
{ | |
"role": "system", | |
"content": f"Schreibe mir zu diesen Artikel Frequently Asked Questions nach folgenden Regeln {faq_query}.", | |
}, | |
], | |
) | |
st.session_state["faq"] = res["choices"][0]["message"]["content"] | |
except Exception as e: | |
print(f"Fehler beim extrahieren der Query: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
def extract_scholar_query(article: str): | |
openai.api_key = os.environ.get("OPEN_API_KEY") | |
openai.api_base = os.environ.get("OPEN_API_BASE") | |
openai.api_type = os.environ.get("OPEN_API_TYPE") | |
openai.api_version = os.environ.get("OPEN_API_VERSION") | |
try: | |
res = openai.ChatCompletion.create( | |
engine="gpt-4-1106", | |
temperature=0.2, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"You are a professional journalist whose task is to find related studies based on an article you have written. Please write a query that you would use to search for related studies on Google Scholar. Please make sure that the query is specific enough and cotains a maximum of 4 words. Only include one query in your output. Do not write multiple querys with an AND or OR. The article you have written is as follows: {article}.", | |
} | |
], | |
) | |
return res["choices"][0]["message"]["content"] | |
except Exception as e: | |
print(f"Fehler beim extrahieren der Query: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
return "" | |
def create_article(length_option, articles, params, web_page_option): | |
if length_option == "Kurz": | |
length = os.environ.get("SHORT_LENGTH") | |
elif length_option == "Mittel": | |
length = os.environ.get("MEDIUM_LENGTH") | |
elif length_option == "Lang": | |
length = os.environ.get("LONG_LENGTH") | |
elif length_option == "SEO": | |
length = os.environ.get("SEO_LENGTH") | |
elif length_option == "SEO Plus": | |
length = os.environ.get("SEO_PLUS_LENGTH") | |
openai.api_key = os.environ.get("OPEN_API_KEY") | |
openai.api_base = os.environ.get("OPEN_API_BASE") | |
openai.api_type = os.environ.get("OPEN_API_TYPE") | |
openai.api_version = os.environ.get("OPEN_API_VERSION") | |
if web_page_option == "Boulevard": | |
writing_style = os.environ.get("WRITING_STYLE_HEUTE") | |
elif web_page_option == "Health Blog": | |
writing_style = os.environ.get("WRITING_STYLE_GESUND") | |
elif web_page_option == "Newspaper": | |
writing_style = os.environ.get("WRITING_STYLE_NEWSPAPER") | |
elif web_page_option == "Tech/Lifestyle Blog": | |
writing_style = os.environ.get("WRITING_STYLE_TECH_BLOG") | |
elif web_page_option == "Public Relations": | |
writing_style = os.environ.get("WRITING_STYLE_PR") | |
elif web_page_option == "Sales": | |
writing_style = os.environ.get("WRITING_STYLE_SALES") | |
elif web_page_option == "Lifestyle Blog": | |
writing_style = os.environ.get("WRITING_STYLE_LIFESTYLE") | |
try: | |
if len(articles) > 0: | |
article_string = "; ".join( | |
f"Artikel {index + 1}: {artikel}" | |
for index, artikel in enumerate(articles) | |
) | |
messages = [ | |
{ | |
"role": "system", | |
"content": f"You are a professional journalist whose task is to write your own article based on one or more articles. This article should combine the content of the original articles, but have its own writing style, which is as follows: {writing_style} Do not use unusual phrases or neologisms from the original articles.", | |
}, | |
{"role": "system", "content": f"Source articles: {article_string}"}, | |
{ | |
"role": "system", | |
"content": f"Please also note the following instructions defined by the user: {params}", | |
}, | |
{ | |
"role": "system", | |
"content": f" It is very important that the length of your article you generate should be {length} words long.", | |
}, | |
{ | |
"role": "system", | |
"content": "Schreibe den Artikel immer in deutscher Sprache.", | |
}, | |
] | |
else: | |
messages = [ | |
{ | |
"role": "system", | |
"content": f"You are a professional journalist whose task is to write an article based on your own notes. This article should be written in the following writing style: {writing_style} .It is important that the length of your article should be {length} words long.", | |
}, | |
{ | |
"role": "system", | |
"content": f"Please write the article based on the following user input: {params}", | |
}, | |
{ | |
"role": "system", | |
"content": "Schreibe den Artikel immer in deutscher Sprache.", | |
}, | |
] | |
res = openai.ChatCompletion.create( | |
engine="gpt-35-16k", | |
temperature=0.4, | |
max_tokens=8000, | |
messages=messages, | |
) | |
return res["choices"][0]["message"]["content"] | |
except Exception as e: | |
print(f"Fehler beim erstellen des artikels: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
def create_headline(article, web_page_option): | |
openai.api_key = os.environ.get("OPEN_API_KEY") | |
openai.api_base = os.environ.get("OPEN_API_BASE") | |
openai.api_type = os.environ.get("OPEN_API_TYPE") | |
openai.api_version = os.environ.get("OPEN_API_VERSION") | |
if web_page_option == "Boulevard": | |
writing_style = os.environ.get("WRITING_STYLE_HEUTE") | |
else: | |
writing_style = os.environ.get("WRITING_STYLE_GESUND") | |
try: | |
res = openai.ChatCompletion.create( | |
engine="gpt-4-1106", | |
temperature=0.4, | |
messages=[ | |
{ | |
"role": "system", | |
"content": f"You are a professional journalist and have the task of generating a headline for an article you have written. I will give you the writing style that was used to create the article as info. Writing style: {writing_style} The headline should be as short as possible, but still capture the essence of the article. It should be a maximum of 10 words long", | |
}, | |
{"role": "system", "content": f"Source article: {article}"}, | |
{ | |
"role": "system", | |
"content": "Schreibe die Headline immer in deutscher Sprache.", | |
}, | |
], | |
) | |
return res["choices"][0]["message"]["content"] | |
except Exception as e: | |
print(f"Fehler beim erstellen der headline: {str(e)}") | |
st.error(f"Something went wrong: {str(e)}", icon="🚨") | |
def extract_text_from_element(element): | |
# Initialisiere einen leeren Textstring | |
text_content = "" | |
# Überprüfe, ob das Element ein <p>, <ul> oder <ol>-Tag ist | |
if element.name in ["p", "ul", "ol"]: | |
# Extrahiere den Text des Tags und füge ihn zum Textstring hinzu | |
text_content += element.get_text() + "\n" | |
# Überprüfe, ob das Element ein Tag mit Kindern ist (kein Textknoten) | |
if not isinstance(element, NavigableString): | |
# Rekursiv durch jedes Child-Element gehen und den Text hinzufügen | |
for child in element.children: | |
text_content += extract_text_from_element(child) | |
return text_content | |
def get_article_summary(article: str) -> str: | |
try: | |
response = requests.post( | |
os.environ.get("SUMMARY_API"), | |
headers={ | |
"Content-Type": "application/json", | |
"Authorization": "Bearer " + os.environ.get("SUMMARY_API_KEY"), | |
"azureml-model-deployment": "heute-summary-api", | |
}, | |
data={"article": article}, | |
) | |
response.raise_for_status() | |
return response.json()["summary"] | |
except Exception as e: | |
print(f"Fehler beim erstellen der Zusammenfassung: {str(e)}") | |
return "" | |
def extract_article(url): | |
# Webseite herunterladen | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
} | |
response = requests.get(url, headers=headers) | |
# Überprüfen, ob die Anfrage erfolgreich war (Status-Code 200) | |
if response.status_code == 200: | |
# HTML-Inhalt parsen | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Finden Sie das <article>-Tag (nehmen Sie an, dass es eins gibt) | |
article_tag = soup.find("article") | |
if article_tag: | |
# Starte die Rekursion für jedes Child-Element des <article>-Tags | |
extracted_text = extract_text_from_element(article_tag) | |
stripped_text = filter_empty_lines(extracted_text) | |
return stripped_text | |
else: | |
print("Kein <article>-Tag gefunden.") | |
return None | |
else: | |
# Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben | |
print(f"Fehler: {response.status_code}") | |
return None | |
def filter_empty_lines(text): | |
# Teile den Text in Zeilen auf | |
lines = text.split("\n") | |
# Filtere leere Zeilen heraus | |
non_empty_lines = filter(lambda line: line.strip() != "", lines) | |
# Verbinde die nicht leeren Zeilen zu einem String | |
filtered_text = "\n".join(non_empty_lines) | |
return filtered_text | |
def extract_article_links(**kwargs): | |
# print(len(kwargs["links"])) | |
with st.spinner("Extrahiere..."): | |
results = [] | |
for link in kwargs["links"]: | |
results.append(extract_article(link)) | |
st.session_state["extracted_articles"] = results | |
if st.session_state["process_step"] < 1: | |
st.session_state["process_step"] += 1 | |
st.session_state["selected_page"] = 1 | |
def extract_article_links_for_heading(**kwargs): | |
article = extract_article(kwargs["link"]) | |
def finalize_articles(): | |
final_articles = [] | |
for i in range(len(st.session_state["extracted_articles"])): | |
final_articles.append(st.session_state["final_article_" + str(i + 1)]) | |
st.session_state["final_articles"] = final_articles | |
if st.session_state["process_step"] < 2: | |
st.session_state["process_step"] += 1 | |
st.session_state["selected_page"] += 1 | |
def increase_page(): | |
if st.session_state["selected_page"] <= st.session_state["process_step"]: | |
st.session_state["selected_page"] += 1 | |
def decrease_page(): | |
if st.session_state["selected_page"] > 0: | |
st.session_state["selected_page"] -= 1 | |
def on_click_handler_generate_article(**kwargs): | |
with st.spinner("Generiere Artikel..."): | |
created_article = create_article( | |
kwargs["length_option"], | |
kwargs["final_articles"], | |
kwargs["add_info"], | |
kwargs["webpage_option"], | |
) | |
headline = create_headline(created_article, kwargs["webpage_option"]) | |
print(headline) | |
print(created_article) | |
db_analytics_item = { | |
"id": str(uuid.uuid4()), | |
"oparation": "article_generation", | |
"timestamp": str(datetime.now()), | |
} | |
client: ContainerProxy = st.session_state["db_container"] | |
client.create_item(body=db_analytics_item) | |
st.session_state["generated_article"] = created_article | |
st.session_state["generated_headline"] = headline | |
st.session_state["article_summary"] = get_article_summary(created_article) | |
if st.session_state["process_step"] < 3: | |
st.session_state["process_step"] += 1 | |
st.session_state["selected_page"] += 1 | |
def on_click_handler_generate_generate_article_keywords(**kwargs): | |
with st.spinner("Generiere Artikel..."): | |
created_article = create_article( | |
kwargs["length_option"], | |
"", | |
kwargs["artikel_input"], | |
kwargs["webpage_option"], | |
) | |
headline = create_headline(created_article, kwargs["webpage_option"]) | |
summary = get_article_summary(created_article) | |
db_analytics_item = { | |
"id": str(uuid.uuid4()), | |
"oparation": "article_generation", | |
"timestamp": str(datetime.now()), | |
} | |
client: ContainerProxy = st.session_state["db_container"] | |
client.create_item(body=db_analytics_item) | |
st.session_state["generated_article"] = created_article | |
st.session_state["generated_headline"] = headline | |
st.session_state["article_summary"] = summary | |
def reset_session_state(): | |
st.session_state["extracted_articles"] = [] | |
st.session_state["article_links"] = [] | |
st.session_state["final_articles"] = [] | |
st.session_state["process_step"] = 0 | |
st.session_state["selected_page"] = 0 | |
st.session_state["generated_article"] = "" | |
st.session_state["studie_links"] = [] | |
st.session_state["article_summary"] = "" | |
st.session_state["selection_content_trends_ressort"] = "Alle" | |
st.session_state["trends_realtime_all"] = {} | |
st.session_state["trends_today"] = {} | |
st.session_state["trends_yesterday"] = [] | |
st.session_state["content_trend_articles_extracted"] = [] | |
st.session_state["content_trend_article_links"] = [] | |
st.session_state["webpage_option"] = "Boulevard" | |
## Trends | |
def fetch_trends(**kwargs): | |
timespan = kwargs["timespan"] | |
match timespan: | |
case "Echtzeit": | |
fetch_trends_realtime() | |
case "Heute": | |
fetch_trends_today() | |
case "Gestern": | |
fetch_trends_yesterday() | |
def fetch_trends_realtime(): | |
pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50)) | |
for ressort_name, ressort_code in RESSORTS.items(): | |
trends_realtime = pytrend.realtime_trending_searches(pn='AT', cat=ressort_code, count=15) | |
st.session_state["trends_realtime_" + ressort_code] = trends_realtime | |
def fetch_trends_today(): | |
pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50)) | |
trends_today = pytrend.today_searches(pn="AT") | |
st.session_state["trends_today"] = trends_today | |
def fetch_trends_yesterday(): | |
timezone = 'Europe/Vienna' | |
today = datetime.now(pytz.timezone(timezone)).date() | |
feed = ET.fromstring(requests.get(TRENDS_YESTERDAY_FEED_URL).content) | |
ns = {'ht': 'https://trends.google.de/trends/trendingsearches/daily'} # Define namespace | |
trends = [] | |
for item in feed.findall(".//item"): | |
pubDate = datetime.strptime(item.find('pubDate').text, '%a, %d %b %Y %H:%M:%S %z').date() | |
# Filter: Überspringe, wenn pubDate heute ist | |
if pubDate == today: | |
continue | |
entry = { | |
'title': item.find('title').text, | |
'pubDate': item.find('pubDate').text, | |
'approx_traffic': item.find('ht:approx_traffic', ns).text if item.find('ht:approx_traffic', ns) is not None else None, | |
'news_items': [] | |
} | |
for news_item in item.findall('ht:news_item', ns): | |
news_details = { | |
'title': news_item.find('ht:news_item_title', ns).text, | |
'snippet': news_item.find('ht:news_item_snippet', ns).text, | |
'url': news_item.find('ht:news_item_url', ns).text, | |
'source': news_item.find('ht:news_item_source', ns).text | |
} | |
entry['news_items'].append(news_details) | |
trends.append(entry) | |
st.session_state["trends_yesterday"] = trends | |
def render_trends_realtime(container): | |
ressort = st.session_state["selection_content_trends_ressort"] | |
trends_realtime = st.session_state["trends_realtime_" + RESSORTS[ressort]] | |
if trends_realtime == {}: | |
container.info( | |
body="Die Echtzeit-Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!", | |
icon="ℹ️" | |
) | |
else: | |
container.selectbox( | |
label="Ressort auswählen", | |
options=RESSORTS, | |
placeholder="Bitte auswählen", | |
key="selection_content_trends_ressort", | |
) | |
for trend_count, trend in enumerate(trends_realtime, start=1): | |
with container.expander(f"{trend_count} -- {trend['title']}"): | |
articles = extract_article_details_realtime(trend['articles']) | |
for article_count, article in enumerate(articles, start=1): | |
key = f"selection_trends_realtime_{ressort}_{trend_count}_{article_count}" | |
st.checkbox( | |
f"{article_count} -- {article['articleTitle']} [Go To →]({article['url']})", | |
key=key, | |
disabled=disable_checkbox(f"selection_trends_realtime_{ressort}", key), | |
on_change=update_trend_article_list(key, article['url']) | |
) | |
def render_trends_today(container): | |
trends_today = st.session_state["trends_today"] | |
if trends_today == {}: | |
container.info( | |
body="Die heutigen Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!", | |
icon="ℹ️" | |
) | |
for trend_count, trend in enumerate(trends_today, start=1): | |
with container.expander(f"{trend_count} -- {trend['title']['query']} | Generated Traffic: {trend['formattedTraffic']}"): | |
articles = extract_article_details_today(trend['articles']) | |
for article_count, article in enumerate(articles, start=1): | |
key = f"selection_trends_today_{trend_count}_{article_count}" | |
st.checkbox( | |
f"{article_count} -- {article['articleTitle']} [Go To →]({article['url']})", | |
key=key, | |
disabled=disable_checkbox("selection_trends_today", key), | |
on_change=update_trend_article_list(key, article['url']) | |
) | |
def render_trends_yesterday(container): | |
trends_yesterday = st.session_state["trends_yesterday"] | |
if trends_yesterday == []: | |
container.info( | |
body="Die gestrigen Trends wurden noch nicht geladen. Bitte verwende zunächst die Suche auf der rechten Seite!", | |
icon="ℹ️" | |
) | |
for trend_count, trend in enumerate(trends_yesterday, start=1): | |
with container.expander(f"{trend_count}• {trend['title']} | Generated Traffic: {trend['approx_traffic']}"): | |
st.write(f"Veröffentlichungsdatum : {trend['pubDate']}") | |
for article_count, article in enumerate(trend['news_items'], start=1): | |
key = f"selection_trends_yesterday_{trend_count}_{article_count}" | |
st.checkbox( | |
label=f"{article_count} -- {article['title']} [Go To →]({article['url']})", | |
key=key, | |
disabled=disable_checkbox("selection_trends_yesterday", key), | |
on_change=update_trend_article_list(key, article['url']) | |
) | |
def get_checkbox_states(pattern: str): | |
cb_states = {key: val for key, val in st.session_state.items() if re.search(string=key, pattern=pattern)} | |
return cb_states | |
def disable_checkbox(pattern: str, session_key: bool): | |
if session_key in list(st.session_state.keys()): | |
cb_states = get_checkbox_states(pattern) | |
return not cb_states[session_key] and not sum(list(cb_states.values())) < LINKS_MAX_CHECKED | |
return False | |
def update_trend_article_list(session_key, article_url): | |
if session_key in list(st.session_state.keys()): | |
if st.session_state[session_key]: | |
if article_url not in st.session_state["content_trend_article_links"]: | |
st.session_state["content_trend_article_links"].append(article_url) | |
else: | |
if article_url in st.session_state["content_trend_article_links"]: | |
st.session_state["content_trend_article_links"].remove(article_url) | |
## Content extraction | |
def extract_text_from_element(element): | |
# Initialisiere einen leeren Textstring | |
text_content = "" | |
# Überprüfe, ob das Element ein <p>, <ul> oder <ol>-Tag ist | |
if element.name in ["p", "ul", "ol"]: | |
# Extrahiere den Text des Tags und füge ihn zum Textstring hinzu | |
text_content += element.get_text() + "\n" | |
# Überprüfe, ob das Element ein Tag mit Kindern ist (kein Textknoten) | |
if not isinstance(element, NavigableString): | |
# Rekursiv durch jedes Child-Element gehen und den Text hinzufügen | |
for child in element.children: | |
text_content += extract_text_from_element(child) | |
return text_content | |
def filter_empty_lines(text): | |
# Teile den Text in Zeilen auf | |
lines = text.split("\n") | |
# Filtere leere Zeilen heraus | |
non_empty_lines = filter(lambda line: line.strip() != "", lines) | |
# Verbinde die nicht leeren Zeilen zu einem String | |
filtered_text = "\n".join(non_empty_lines) | |
return filtered_text | |
def extract_article(url): | |
# Webseite herunterladen | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
} | |
response = requests.get(url, headers=headers) | |
# Überprüfen, ob die Anfrage erfolgreich war (Status-Code 200) | |
if response.status_code == 200: | |
# HTML-Inhalt parsen | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Finden Sie das <article>-Tag (nehmen Sie an, dass es eins gibt) | |
article_tag = soup.find("article") | |
if article_tag: | |
# Starte die Rekursion für jedes Child-Element des <article>-Tags | |
extracted_text = extract_text_from_element(article_tag) | |
stripped_text = filter_empty_lines(extracted_text) | |
return stripped_text | |
else: | |
print("Kein <article>-Tag gefunden.") | |
return None | |
else: | |
# Falls die Anfrage nicht erfolgreich war, eine Fehlermeldung ausgeben | |
print(f"Fehler: {response.status_code}") | |
return None | |
def extract_links(**kwargs): | |
with st.spinner("Extrahiere Informationen aus den Links..."): | |
results = [] | |
for link in kwargs["links"]: | |
if link != '': | |
results.append(extract_article(link)) | |
st.session_state[kwargs["key"]] = results | |
def extract_article_details_realtime(articles): | |
article_details = [] | |
for article in articles: | |
article_detail = { | |
'url': article['url'], | |
'snippet': article['snippet'], | |
'articleTitle': article['articleTitle'], | |
'time': article['time'] | |
} | |
article_details.append(article_detail) | |
return article_details | |
def extract_article_details_today(articles): | |
article_details = [] | |
for article in articles: | |
article_detail = { | |
'url': article['url'], | |
'snippet': article['snippet'], | |
'articleTitle': article['title'], | |
} | |
article_details.append(article_detail) | |
return article_details | |
def get_final_articles(): | |
final_trend_articles = [article_content for article_key, article_content in st.session_state.items() if re.search(string=article_key, pattern="content_trend_article_final")] | |
return final_trend_articles | |
if "extracted_articles" not in st.session_state: | |
st.session_state["extracted_articles"] = [] | |
if "article_links" not in st.session_state: | |
print(st.query_params.get_all("article-links[]")) | |
if st.query_params.get_all("article-links[]"): | |
st.session_state["article_links"] = st.query_params.get_all("article-links[]") | |
else: | |
st.session_state["article_links"] = [] | |
if "final_articles" not in st.session_state: | |
st.session_state["final_articles"] = [] | |
if "process_step" not in st.session_state: | |
st.session_state["process_step"] = 0 | |
if "selected_page" not in st.session_state: | |
st.session_state["selected_page"] = 0 | |
if "generated_article" not in st.session_state: | |
st.session_state["generated_article"] = "" | |
if "generated_headline" not in st.session_state: | |
st.session_state["generated_headline"] = "" | |
if "webpage_option" not in st.session_state: | |
st.session_state["webpage_option"] = "Boulevard" | |
if "studie_links" not in st.session_state: | |
st.session_state["studie_links"] = [] | |
if "db_container" not in st.session_state: | |
client = ( | |
CosmosClient(os.environ["DB_ENDPOINT"], os.environ["DB_KEY"]) | |
.get_database_client(os.environ["DB_NAME"]) | |
.get_container_client("tina-analytics") | |
) | |
db_analytics_item = { | |
"id": str(uuid.uuid4()), | |
"oparation": "page_load", | |
"timestamp": str(datetime.now()), | |
} | |
client.create_item(body=db_analytics_item) | |
st.session_state["db_container"] = client | |
if "article_summary" not in st.session_state: | |
st.session_state["article_summary"] = "" | |
if "article_generation_mode" not in st.session_state: | |
st.session_state["article_generation_mode"] = "links" | |
if "selection_content_trends_ressort" not in st.session_state: | |
st.session_state["selection_content_trends_ressort"] = "Alle" | |
if "trends_realtime_all" not in st.session_state: | |
st.session_state["trends_realtime_all"] = {} | |
if "trends_today" not in st.session_state: | |
st.session_state["trends_today"] = {} | |
if "trends_yesterday" not in st.session_state: | |
st.session_state["trends_yesterday"] = [] | |
if "content_trend_articles_extracted" not in st.session_state: | |
st.session_state["content_trend_articles_extracted"] = [] | |
if "content_trend_article_links" not in st.session_state: | |
st.session_state["content_trend_article_links"] = [] | |
PROCESS_STEPS = [ | |
"Artikel Extraktion", | |
"Artikel Finalisierung", | |
"Artikel Generierung", | |
"Artikel Ausgabe", | |
] | |
RESSORTS = { | |
"Alle": "all", | |
"Gesundheit": "m", | |
"Business": "b", | |
"Headlines": "h", | |
"Sport": "s", | |
"Entertainment": "e", | |
"Technik": "t", | |
} | |
TRENDS_YESTERDAY_FEED_URL = 'https://trends.google.de/trends/trendingsearches/daily/rss?geo=AT' | |
LINKS_MAX_CHECKED = 3 | |
def check_password(): | |
"""Returns `True` if the user had the correct password.""" | |
def password_entered(): | |
"""Checks whether a password entered by the user is correct.""" | |
if hmac.compare_digest(st.session_state["password"], os.environ.get("PASSWORD")): | |
st.session_state["password_correct"] = True | |
del st.session_state["password"] # Don't store the password. | |
else: | |
st.session_state["password_correct"] = False | |
# Return True if the password is validated. | |
if st.session_state.get("password_correct", False): | |
return True | |
# Show input for password. | |
st.text_input( | |
"Password", type="password", on_change=password_entered, key="password" | |
) | |
if "password_correct" in st.session_state: | |
st.error("😕 Password incorrect") | |
return False | |
if not check_password(): | |
st.stop() # Do not continue if check_password is not True. | |
col1, col2 = st.columns([2, 1]) | |
col1.title("TINA") | |
col2.image("tensora_logo.png") | |
st.radio( | |
"Wähle den Schreibstil für Artikel aus", | |
[ | |
"Boulevard", | |
"Health Blog", | |
"Newspaper", | |
"Tech/Lifestyle Blog", | |
"Public Relations", | |
"Sales", | |
"Lifestyle Blog", | |
], | |
key="webpage_option", | |
) | |
with st.sidebar: | |
st.title("Funktions Auswahl") | |
st.write("Hier kannst Du zwischen der Art der Artikelgenerierung wählen.") | |
st.button( | |
"Artikel Generierung mit Links", | |
key="article_gen_btn", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"article_generation_mode": "links"}), | |
) | |
st.button( | |
"Artikel Generierung mit Stichpunkten", | |
key="headline_gen_btn", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"article_generation_mode": "keywords"}), | |
) | |
st.button( | |
label="Artikelgenerierung mit Trendthemenanalyse", | |
key="trends_gen_btn", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"article_generation_mode": "trends"}) | |
) | |
if st.session_state["article_generation_mode"] == "links": | |
tab_col1, tab_col2, tab_col3, tab_col4 = st.columns([1, 1, 1, 1]) | |
tab_col1.button( | |
"Artikel Extraktion", | |
key="tab1", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"selected_page": 0}), | |
disabled=st.session_state["selected_page"] == 0, | |
) | |
tab_col2.button( | |
"Artikel Finalisierung", | |
key="tab2", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"selected_page": 1}), | |
disabled=st.session_state["process_step"] < 1 | |
or st.session_state["selected_page"] == 1, | |
) | |
tab_col3.button( | |
"Artikel Generierung", | |
key="tab3", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"selected_page": 2}), | |
disabled=st.session_state["process_step"] < 2 | |
or st.session_state["selected_page"] == 2, | |
) | |
tab_col4.button( | |
"Artikel Ausgabe", | |
key="tab4", | |
use_container_width=True, | |
on_click=lambda: st.session_state.update({"selected_page": 3}), | |
disabled=st.session_state["process_step"] < 3 | |
or st.session_state["selected_page"] == 3, | |
) | |
nav_col1, nav_col2, nav_col3 = st.columns([1, 4, 1]) | |
nav_col1.button( | |
"◀️", | |
key="nav1", | |
use_container_width=True, | |
on_click=decrease_page, | |
disabled=st.session_state["selected_page"] == 0, | |
) | |
nav_col2.markdown( | |
f"<div style='text-align: center;'>{PROCESS_STEPS[st.session_state['selected_page']]}</div>", | |
unsafe_allow_html=True, | |
) | |
nav_col3.button( | |
"▶️", | |
key="nav2", | |
use_container_width=True, | |
on_click=increase_page, | |
disabled=st.session_state["selected_page"] == st.session_state["process_step"], | |
) | |
if st.session_state["selected_page"] == 0: | |
st.write( | |
"Bitte gebe die Links der Artikel ein, welche Du extrahiert haben möchtest." | |
) | |
st.text_input( | |
"Gebe den " | |
+ str(len(st.session_state["article_links"]) + 1) | |
+ ". Link ein:", | |
key="link_input_" + str(len(st.session_state["article_links"]) + 1), | |
) | |
if st.session_state[ | |
"link_input_" + str(len(st.session_state["article_links"]) + 1) | |
]: | |
st.session_state["article_links"].append( | |
st.session_state[ | |
"link_input_" + str(len(st.session_state["article_links"]) + 1) | |
] | |
) | |
st.rerun() | |
for i in range(len(st.session_state["article_links"])): | |
st.write(f"Link nr. {i+1}:\n\n{st.session_state['article_links'][i]}") | |
if len(st.session_state["article_links"]) > 0: | |
try: | |
st.button( | |
"Extrahiere Artikel", | |
on_click=extract_article_links, | |
kwargs={"links": st.session_state["article_links"]}, | |
) | |
except Exception as e: | |
print(f"Fehler beim extrahieren der artikel: {str(e)}") | |
st.error( | |
f"Du hast einen oder mehrere Links nicht in dem korrekten Format angegeben. Bitte Lade die Seite neu und benutze korrekte Links: {str(e)}", | |
icon="🚨", | |
) | |
elif st.session_state["selected_page"] == 1: | |
st.write( | |
"Hier kannst Du die extrahierten Artikel ansehen und bei Bedarf anpassen." | |
) | |
for i, article in enumerate(st.session_state["extracted_articles"]): | |
with st.expander(f"Artikel {i+1}"): | |
if article: | |
st.text_area( | |
"Editiere die Artikel, falls nötig:", | |
value=article, | |
key="final_article_" + str(i + 1), | |
height=500, | |
) | |
else: | |
st.info( | |
"Die Webseite des Artikels blockiert das automatische extrahieren von Artikeln. Wenn Du den Artikel dennoch benutzen möchtest, dann kannst Du diesen kopieren und einfügen.", | |
icon="ℹ️", | |
) | |
st.text_area( | |
"Füge den Artikel ein, falls nötig:", | |
value=article, | |
key="final_article_" + str(i + 1), | |
height=500, | |
) | |
st.button("Artikel finalisieren", on_click=finalize_articles) | |
elif st.session_state["selected_page"] == 2: | |
for i in range(len(st.session_state["final_articles"])): | |
if st.session_state["final_articles"][i]: | |
with st.expander("Artikel " + str(i + 1)): | |
st.write(st.session_state["final_articles"][i]) | |
if len(st.session_state["final_articles"]) > 0: | |
st.write("Benutzte Artikel:") | |
for i, link in enumerate(st.session_state["article_links"]): | |
st.write(f"Link {i+1}: {link}") | |
st.text_area( | |
"Füge weitere Informationen für den Prompt hinzu, falls nötig:", | |
key="add_info", | |
) | |
st.write("Artikellänge") | |
st.radio( | |
"Optionen", | |
["Kurz", "Mittel", "Lang", "SEO", "SEO Plus"], | |
key="length_option", | |
) | |
st.button( | |
"Artikel generieren", | |
key="article_btn", | |
on_click=on_click_handler_generate_article, | |
kwargs={ | |
"length_option": st.session_state["length_option"], | |
"final_articles": st.session_state["final_articles"], | |
"add_info": st.session_state["add_info"], | |
"webpage_option": st.session_state["webpage_option"], | |
}, | |
) | |
elif st.session_state["selected_page"] == 3: | |
st.write(f"**{st.session_state['generated_headline']}**") | |
st.write(st.session_state["generated_article"]) | |
st.write("**Zusammenfassung:**") | |
st.write(st.session_state["article_summary"]) | |
st.write("Kopieren Sie den Artikel: ") | |
st_copy_to_clipboard( | |
st.session_state["generated_headline"] | |
+ "\n" | |
+ st.session_state["generated_article"] | |
) | |
if st.session_state["studie_links"]: | |
st.write("Hier sind einige Studien, die relevant sein könnten:") | |
for result in st.session_state["studie_links"]: | |
st.write(f"- [{result['title']}]({result['link']})") | |
else: | |
st.write("Keine relevanten Studien gefunden.") | |
if "takeaways" in st.session_state: | |
st.write("Hier sind einige Takeaways die wichtig sein könnten:") | |
st.write(st.session_state["takeaways"]) | |
if "faq" in st.session_state: | |
st.write("Hier sind FAQs zu dem Artikel:") | |
st.write(st.session_state["faq"]) | |
st.button( | |
"Relevante Studien finden", | |
on_click=get_related_studies, | |
args=(st.session_state["generated_article"],), | |
) | |
st.button( | |
"Key Takeaways generieren", | |
on_click=lambda: get_takeaways(st.session_state["generated_article"]), | |
) | |
st.button( | |
"FAQ generieren", | |
on_click=lambda: get_faq(st.session_state["generated_article"]), | |
) | |
st.button( | |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state | |
) | |
if st.session_state["article_generation_mode"] == "keywords": | |
st.write( | |
"Bitte trage die Stichpunkte ein, die Du in den Artikel einbauen möchtest. Der Textinput ist essenziell für die Generierung des Artikels." | |
) | |
st.text_area(label="Artikel input:", key="keyword_article_input") | |
st.write("Artikellänge") | |
st.radio( | |
"Optionen", ["Kurz", "Mittel", "Lang", "SEO", "SEO Plus"], key="length_option" | |
) | |
st.button( | |
"Artikel generieren", | |
key="article_btn", | |
on_click=on_click_handler_generate_generate_article_keywords, | |
kwargs={ | |
"length_option": st.session_state["length_option"], | |
"artikel_input": st.session_state["keyword_article_input"], | |
"webpage_option": st.session_state["webpage_option"], | |
}, | |
) | |
if st.session_state["generated_article"] and st.session_state["generated_headline"]: | |
st.write(f"**{st.session_state['generated_headline']}**") | |
st.write(st.session_state["generated_article"]) | |
st.write("**Zusammenfassung:**") | |
st.write(st.session_state["article_summary"]) | |
st.write("Kopieren Sie den Artikel: ") | |
st_copy_to_clipboard( | |
st.session_state["generated_headline"] | |
+ "\n" | |
+ st.session_state["generated_article"] | |
) | |
if st.session_state["studie_links"]: | |
st.write("Hier sind einige Studien, die relevant sein könnten:") | |
for result in st.session_state["studie_links"]: | |
st.write(f"- [{result['title']}]({result['link']})") | |
# else: | |
# st.write("Keine relevanten Studien gefunden.") | |
st.button( | |
"Relevante Studien finden", | |
on_click=get_related_studies, | |
args=(st.session_state["generated_article"],), | |
) | |
if "takeaways" in st.session_state: | |
st.write("Hier sind einige Takeaways die wichtig sein könnten:") | |
st.write(st.session_state["takeaways"]) | |
if "faq" in st.session_state: | |
st.write("Hier sind FAQs zu dem Artikel:") | |
st.write(st.session_state["faq"]) | |
st.button( | |
"Key Takeaways generieren", | |
on_click=lambda: get_takeaways(st.session_state["generated_article"]), | |
) | |
st.button( | |
"FAQ generieren", | |
on_click=lambda: get_faq(st.session_state["generated_article"]), | |
) | |
st.button( | |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state | |
) | |
if st.session_state["article_generation_mode"] == "trends": | |
trends_left, trends_right = st.columns([0.8, 0.2]) | |
trends_right.radio( | |
label="Zeitraum auswählen", | |
options=[ | |
"Echtzeit", | |
"Heute", | |
"Gestern" | |
], | |
key="selection_content_trends_timespan" | |
) | |
trends_right.button( | |
label="Suchen", | |
type="primary", | |
on_click=fetch_trends, | |
kwargs={ | |
"timespan": st.session_state["selection_content_trends_timespan"] | |
}, | |
use_container_width=True | |
) | |
trends_timespan = st.session_state["selection_content_trends_timespan"] | |
match trends_timespan: | |
case "Echtzeit": | |
render_trends_realtime(trends_left) | |
case "Heute": | |
render_trends_today(trends_left) | |
case "Gestern": | |
render_trends_yesterday(trends_left) | |
try: | |
st.button( | |
label="Informationen aus Links extrahieren", | |
on_click=extract_links, | |
use_container_width=True, | |
type="secondary", | |
key="btn_extract_trend_links", | |
kwargs={ | |
"key": "content_trend_articles_extracted", | |
"links": st.session_state["content_trend_article_links"] | |
}, | |
) | |
except Exception as e: | |
print(f"Fehler beim Extrahieren der Informationen: {str(e)}") | |
st.error( | |
body=f"Sie haben einen oder mehrere Links in einem inkorrekten Format angegeben. Bitte lade diese Seite neu und verwende valide URLs: {str(e)}", | |
icon="🚨", | |
) | |
st.write() | |
if st.session_state["content_trend_article_links"] != []: | |
st.write("Folgende Informationen konnten aus ihren Artikeln extrahiert werden:") | |
for i, link_content in enumerate(st.session_state["content_trend_articles_extracted"]): | |
with st.expander(f"Link {i+1}"): | |
if link_content: | |
st.text_area( | |
label="Bitte bearbeiten Sie die Informationen falls notwendig:", | |
value=link_content, | |
key="content_trend_article_final_" + str(i + 1) | |
) | |
else: | |
st.info( | |
body="Die Webseite Ihres Artikels blockiert das automatische Extrahieren des Artikels. Wenn Sie den Artikel dennoch verwenden möchten, dann können Sie diesen kopieren und in das untenstehende Textfeld einfügen.", | |
icon="ℹ️", | |
) | |
st.text_area( | |
"Bitte fügen Sie den Artikel ein:", | |
value=link_content, | |
key="content_trend_article_final_" + str(i + 1) | |
) | |
st.write("Artikellänge") | |
st.radio( | |
"Optionen", | |
["Kurz", "Mittel", "Lang", "SEO", "SEO Plus"], | |
key="length_option", | |
) | |
st.text_area( | |
"Füge weitere Informationen für den Prompt hinzu, falls nötig:", | |
key="add_info", | |
) | |
st.button( | |
"Artikel generieren", | |
key="article_btn", | |
on_click=on_click_handler_generate_article, | |
kwargs={ | |
"length_option": st.session_state["length_option"], | |
"final_articles": get_final_articles(), | |
"add_info": st.session_state["add_info"], | |
"webpage_option": st.session_state["webpage_option"], | |
}, | |
) | |
if st.session_state["generated_headline"] != "" and st.session_state["generated_article"] != "": | |
st.write(f"**{st.session_state['generated_headline']}**") | |
st.write(st.session_state["generated_article"]) | |
st.write("**Zusammenfassung:**") | |
st.write(st.session_state["article_summary"]) | |
st.write("Kopieren Sie den Artikel: ") | |
st_copy_to_clipboard( | |
st.session_state["generated_headline"] | |
+ "\n" | |
+ st.session_state["generated_article"] | |
) | |
if st.session_state["studie_links"]: | |
st.write("Hier sind einige Studien, die relevant sein könnten:") | |
for result in st.session_state["studie_links"]: | |
st.write(f"- [{result['title']}]({result['link']})") | |
else: | |
st.write("Keine relevanten Studien gefunden.") | |
if "takeaways" in st.session_state: | |
st.write("Hier sind einige Takeaways die wichtig sein könnten:") | |
st.write(st.session_state["takeaways"]) | |
if "faq" in st.session_state: | |
st.write("Hier sind FAQs zu dem Artikel:") | |
st.write(st.session_state["faq"]) | |
st.button( | |
"Relevante Studien finden", | |
on_click=get_related_studies, | |
args=(st.session_state["generated_article"],), | |
) | |
st.button( | |
"Key Takeaways generieren", | |
on_click=lambda: get_takeaways(st.session_state["generated_article"]), | |
) | |
st.button( | |
"FAQ generieren", | |
on_click=lambda: get_faq(st.session_state["generated_article"]), | |
) | |
st.button( | |
"Neuen Artikel generieren", key="reset_btn", on_click=reset_session_state | |
) |