Spaces:

mrneuralnet
/

mle-case-study

Sleeping

App Files Files Community

mle-case-study / app.py

manfredmichael

Initial commit

966108f 3 months ago

raw

history blame

No virus

2.97 kB

	import streamlit as st
	from dotenv import load_dotenv
	import json
	import os, time
	import uuid

	from retrieval_pipeline import get_retriever, get_compression_retriever
	import benchmark


	def get_result(query, compression_retriever):
	t0 = time.time()
	retrieved_chunks = compression_retriever.get_relevant_documents(query)
	latency = time.time() - t0
	return retrieved_chunks, latency

	st.set_page_config(
	layout="wide",
	page_title="Retrieval Demo"
	)

	def setup():
	load_dotenv()
	ELASTICSEARCH_URL = os.getenv('ELASTICSEARCH_URL')

	retriever = get_retriever(index='masa.ai', elasticsearch_url=ELASTICSEARCH_URL)
	compression_retriever = get_compression_retriever(retriever)
	return compression_retriever


	def main():
	st.title("Part 3: Search")
	# st.sidebar.write("According to the Model Size 👇")
	# menu = ["Nano", "Small", "Medium", "Large"]
	# choice = st.sidebar.selectbox("Choose", menu)

	st.sidebar.info("""
	Model Options:
	- Nano: ~4MB, blazing fast model with competitive performance (ranking precision).
	- Small: ~34MB, slightly slower with the best performance (ranking precision).
	- Medium: ~110MB, slower model with the best zero-shot performance (ranking precision).
	- Large: ~150MB, slower model with competitive performance (ranking precision) for 100+ languages.
	""")

	with st.spinner('Setting up...'):
	compression_retriever = setup()

	with st.expander("Tech Stack Used"):
	st.markdown("""
	Flash Rank: Ultra-lite & Super-fast Python library for search & retrieval re-ranking.

	- Ultra-lite: No heavy dependencies. Runs on CPU with a tiny ~4MB reranking model.
	- Super-fast: Speed depends on the number of tokens in passages and query, plus model depth.
	- Cost-efficient: Ideal for serverless deployments with low memory and time requirements.
	- Based on State-of-the-Art Cross-encoders: Includes models like ms-marco-TinyBERT-L-2-v2 (default), ms-marco-MiniLM-L-12-v2, rank-T5-flan, and ms-marco-MultiBERT-L-12.
	- Sleek Models for Efficiency: Designed for minimal overhead in user-facing scenarios.

	_Flash Rank is tailored for scenarios requiring efficient and effective reranking, balancing performance with resource usage._
	""")


	with st.form(key='input_form'):
	query_input = st.text_area("Query Input")
	# context_input = st.text_area("Context Input")
	submit_button = st.form_submit_button(label='Retrieve')

	if submit_button:
	st.session_state.submitted = True

	if 'submitted' in st.session_state:
	with st.spinner('Processing...'):
	result, latency = get_result(query_input, compression_retriever)
	st.subheader("Please find the retrieved documents below 👇")
	st.write("latency:", latency, " ms")
	st.json(result)



	if __name__ == "__main__":
	main()