Spaces:

Annikaijak
/

Air_quality

Sleeping

App Files Files Community

Annikaijak commited on Apr 27

Commit

cd8951a

•

1 Parent(s): 3cf3b3b

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -174

app.py CHANGED Viewed

@@ -1,197 +1,146 @@
-import json
-import time
-import pickle
 import joblib
-import hopsworks
-import streamlit as st
-from geopy import distance
-import plotly.express as px
-import folium
-from streamlit_folium import st_folium
-from functions import *
-def print_fancy_header(text, font_size=22, color="#ff5f27"):
-    res = f'<span style="color:{color}; font-size: {font_size}px;">{text}</span>'
-    st.markdown(res, unsafe_allow_html=True)
-@st.cache_data()
-def get_batch_data_from_fs(td_version, date_threshold):
-    st.write(f"Retrieving the Batch data since {date_threshold}")
-    feature_view.init_batch_scoring(training_dataset_version=td_version)
-    batch_data = feature_view.get_batch_data(start_time=date_threshold)
-    return batch_data
-@st.cache_data()
-def download_model(name="air_quality_xgboost_model", version=1):
-    mr = project.get_model_registry()
-    retrieved_model = mr.get_model(
-        name="air_quality_xgboost_model",
-        version=1
     )
-    saved_model_dir = retrieved_model.download()
-    return saved_model_dir
-def plot_pm2_5(df):
-    # create figure with plotly express
-    fig = px.line(df, x='date', y='pm2_5', color='city_name')
-    # customize line colors and styles
-    fig.update_traces(mode='lines+markers')
-    fig.update_layout({
-        'plot_bgcolor': 'rgba(0, 0, 0, 0)',
-        'paper_bgcolor': 'rgba(0, 0, 0, 0)',
-        'legend_title': 'City',
-        'legend_font': {'size': 12},
-        'legend_bgcolor': 'rgba(0, 0, 0, 0)',
-        'xaxis': {'title': 'Date'},
-        'yaxis': {'title': 'PM2.5'},
-        'shapes': [{
-            'type': 'line',
-            'x0': datetime.datetime.now().strftime('%Y-%m-%d'),
-            'y0': 0,
-            'x1': datetime.datetime.now().strftime('%Y-%m-%d'),
-            'y1': df['pm2_5'].max(),
-            'line': {'color': 'red', 'width': 2, 'dash': 'dashdot'}
-        }]
-    })
-    # show plot
-    st.plotly_chart(fig, use_container_width=True)
-with open('target_cities.json') as json_file:
-    target_cities = json.load(json_file)
-#########################
-st.title('🌫 Air Quality Prediction 🌦')
-st.write(3 * "-")
-print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
-st.write("Logging... ")
-# (Attention! If the app has stopped at this step,
-# please enter your Hopsworks API Key in the commmand prompt.)
-project = hopsworks.login()
-fs = project.get_feature_store()
-st.write("✅ Logged in successfully!")
-st.write("Getting the Feature View...")
-feature_view = fs.get_feature_view(
-    name = 'air_quality_fv',
-    version = 1
-)
-st.write("✅ Success!")
-# I am going to load data for of last 60 days (for feature engineering)
-today = datetime.date.today()
-date_threshold = today - datetime.timedelta(days=60)
-st.write(3 * "-")
-print_fancy_header('\n☁️ Retriving batch data from Feature Store...')
-batch_data = get_batch_data_from_fs(td_version=1,
-                                    date_threshold=date_threshold)
-st.write("Batch data:")
-st.write(batch_data.sample(5))
-saved_model_dir = download_model(
-    name="air_quality_xgboost_model",
-    version=1
 )
-pipeline = joblib.load(saved_model_dir + "/xgboost_pipeline.pkl")
-st.write("\n")
-st.write("✅ Model was downloaded and cached.")
-st.write(3 * '-')
-st.write("\n")
-print_fancy_header(text="🖍 Select the cities using the form below. \
-                         Click the 'Submit' button at the bottom of the form to continue.",
-                   font_size=22)
-dict_for_streamlit = {}
-for continent in target_cities:
-        for city_name, coords in target_cities[continent].items():
-            dict_for_streamlit[city_name] = coords
-selected_cities_full_list = []
-with st.form(key="user_inputs"):
-    print_fancy_header(text='\n🗺 Here you can choose cities from the drop-down menu',
-                       font_size=20, color="#00FFFF")
-    cities_multiselect = st.multiselect(label='',
-                                        options=dict_for_streamlit.keys())
-    selected_cities_full_list.extend(cities_multiselect)
-    st.write("_" * 3)
-    print_fancy_header(text="\n📌 To add a city using the interactive map, click somewhere \
-                             (for the coordinates to appear)",
-                       font_size=20, color="#00FFFF")
-    my_map = folium.Map(location=[42.57, -44.092], zoom_start=2)
-    # Add markers for each city
-    for city_name, coords in dict_for_streamlit.items():
-        folium.CircleMarker(
-            location=coords
-        ).add_to(my_map)
-    my_map.add_child(folium.LatLngPopup())
-    res_map = st_folium(my_map, width=640, height=480)
-    try:
-        new_lat, new_long = res_map["last_clicked"]["lat"], res_map["last_clicked"]["lng"]
-        # Calculate the distance between the clicked location and each city
-        distances = {city: distance.distance(coord, (new_lat, new_long)).km for city, coord in dict_for_streamlit.items()}
-        # Find the city with the minimum distance and print its name
-        nearest_city = min(distances, key=distances.get)
-        print_fancy_header(text=f"You have selected {nearest_city} using map", font_size=18, color="#52fa23")
-        selected_cities_full_list.append(nearest_city)
-        st.write(label_encoder.transform([nearest_city])[0])
-    except Exception as err:
-        print(err)
-        pass
-    submit_button = st.form_submit_button(label='Submit')
-if submit_button:
-    st.write('Selected cities:', selected_cities_full_list)
-    st.write(3*'-')
-    dataset = batch_data
-    dataset = dataset.sort_values(by=["city_name", "date"])
-    st.write("\n")
-    print_fancy_header(text='\n🧠 Predicting PM2.5 for selected cities...',
-                       font_size=18, color="#FDF4F5")
-    st.write("")
-    preds = pd.DataFrame(columns=dataset.columns)
-    for city_name in selected_cities_full_list:
-        st.write(f"\t * {city_name}...")
-        features = dataset.loc[dataset['city_name'] == city_name]
-        print(features.head())
-        features['pm2_5'] = pipeline.predict(features)
-        preds = pd.concat([preds, features])
-    st.write("")
-    print_fancy_header(text="📈Results 📉",
-                       font_size=22)
-    plot_pm2_5(preds[preds['city_name'].isin(selected_cities_full_list)])
-    st.write(3 * "-")
-    st.subheader('\n🎉 📈 🤝 App Finished Successfully 🤝 📈 🎉')
-    st.button("Re-run")

+import streamlit as st
+import hopsworks
 import joblib
+from openai import OpenAI
+from functions.llm_chain import (
+    load_model,
+    get_llm_chain,
+    generate_response,
+    generate_response_openai,
+)
+import warnings
+warnings.filterwarnings('ignore')
+st.title("🌤️ AirQuality AI assistant 💬")
+@st.cache_resource()
+def connect_to_hopsworks():
+    # Initialize Hopsworks feature store connection
+    project = hopsworks.login()
+    fs = project.get_feature_store()
+    # Retrieve the model registry
+    mr = project.get_model_registry()
+    # Retrieve the 'air_quality_fv' feature view
+    feature_view = fs.get_feature_view(
+        name="air_quality_fv",
+        version=1,
+    )
+    # Initialize batch scoring
+    feature_view.init_batch_scoring(1)
+    # Retrieve the 'air_quality_xgboost_model' from the model registry
+    retrieved_model = mr.get_model(
+        name="air_quality_xgboost_model",
+        version=1,
+    )
+    # Download the saved model artifacts to a local directory
+    saved_model_dir = retrieved_model.download()
+    # Load the XGBoost regressor model and label encoder from the saved model directory
+    model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
+    encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")
+    return feature_view, model_air_quality, encoder
+@st.cache_resource()
+def retrieve_llm_chain():
+    # Load the LLM and its corresponding tokenizer.
+    model_llm, tokenizer = load_model()
+    # Create and configure a language model chain.
+    llm_chain = get_llm_chain(
+        model_llm,
+        tokenizer,
     )
+    return model_llm, tokenizer, llm_chain
+# Retrieve the feature view, air quality model and encoder for the city_name column
+feature_view, model_air_quality, encoder = connect_to_hopsworks()
+# Initialize or clear chat messages based on response source change
+if "response_source" not in st.session_state or "messages" not in st.session_state:
+    st.session_state.messages = []
+    st.session_state.response_source = ""
+# User choice for model selection in the sidebar with OpenAI API as the default
+new_response_source = st.sidebar.radio(
+    "Choose the response generation method:",
+    ('Hermes LLM', 'OpenAI API'),
+    index=1  # Sets "OpenAI API" as the default selection
 )
+# If the user switches the response generation method, clear the chat
+if new_response_source != st.session_state.response_source:
+    st.session_state.messages = []  # Clear previous chat messages
+    st.session_state.response_source = new_response_source  # Update response source in session state
+    # Display a message indicating chat was cleared (optional)
+    st.experimental_rerun()  # Rerun the app to reflect changes immediately
+if new_response_source == 'OpenAI API':
+    openai_api_key = st.sidebar.text_input("Enter your OpenAI API key:", type="password")
+    if openai_api_key:
+        client = OpenAI(
+            api_key=openai_api_key
+        )
+        st.sidebar.success("API key saved successfully ✅")
+elif new_response_source == 'Hermes LLM':
+    # Conditionally load the LLM, tokenizer, and llm_chain if Local Model is selected
+    model_llm, tokenizer, llm_chain = retrieve_llm_chain()
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+# React to user input
+if user_query := st.chat_input("How can I help you?"):
+    # Display user message in chat message container
+    st.chat_message("user").markdown(user_query)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": user_query})
+    st.write('⚙️ Generating Response...')
+    if new_response_source == 'Hermes LLM':
+        # Generate a response to the user query
+        response = generate_response(
+            user_query,
+            feature_view,
+            model_air_quality,
+            encoder,
+            model_llm,
+            tokenizer,
+            llm_chain,
+            verbose=False,
+        )
+    elif new_response_source == 'OpenAI API' and openai_api_key:
+        response = generate_response_openai(
+            user_query,
+            feature_view,
+            model_air_quality,
+            encoder,
+            client,
+            verbose=False,
+        )
+    else:
+        response = "Please select a response generation method and provide necessary details."
+    # Display assistant response in chat message container
+    with st.chat_message("assistant"):
+        st.markdown(response)
+    # Add assistant response to chat history
+    st.session_state.messages.append({"role": "assistant", "content": response})