import streamlit as st from transformers import pipeline from concurrent.futures import ThreadPoolExecutor # Function to load models only once using Streamlit's cache mechanism @st.cache_resource(show_spinner="Loading Models...") def load_models(): base_pipe = pipeline( "text-generation", model="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", max_length=512, ) irai_pipe = pipeline( "text-generation", model="InvestmentResearchAI/LLM-ADE_tiny-v0.001", max_length=512, ) return base_pipe, irai_pipe base_pipe, irai_pipe = load_models() prompt_template = ( "<|system|>\n" "You are a friendly chatbot who always gives helpful, detailed, and polite answers.\n" "<|user|>\n" "{input_text}\n" "<|assistant|>\n" ) executor = ThreadPoolExecutor(max_workers=2) def generate_base_response(input_text): return base_pipe(input_text)[0]["generated_text"] def generate_irai_response(input_text): formatted_input = prompt_template.format(input_text=input_text) result = irai_pipe(formatted_input)[0]["generated_text"] return result.split("<|assistant|>")[1].strip() @st.cache_data(show_spinner="Generating responses...") def generate_response(input_text): try: future_base = executor.submit(generate_base_response, input_text) future_irai = executor.submit(generate_irai_response, input_text) base_resp = future_base.result().replace(input_text, "", 1) irai_resp = future_irai.result() except Exception as e: st.error(f"An error occurred: {e}") return None, None return base_resp, irai_resp st.title("IRAI LLM-ADE vs Base Model") user_input = st.text_area("Enter a financial question:", "") if st.button("Generate"): if user_input: base_response, irai_response = generate_response(user_input) col1, col2 = st.columns(2) with col1: st.header("Base Model") st.text_area(label="", value=base_response, height=300) with col2: st.header("LLM-ADE Enhanced") st.text_area(label="", value=irai_response, height=300) else: st.warning("Please enter some text")