GitaShlok / app.py
GlitchGhost's picture
Upload 2 files
5cc1d59 verified
raw
history blame contribute delete
No virus
3.57 kB
import streamlit as st
import textwrap
import pandas as pd
import time
from sentence_transformers import SentenceTransformer, util
from annoy import AnnoyIndex
footer = """
<p style='text-align: center; color: gray;'>Made with inspiration by Abhijeet Singh</p>
"""
shlok_keys = ['Title', 'Chapter', 'Verse', 'Hindi Anuvad' , 'Enlgish Translation']
max_line_length = 100 # Adjust as needed
@st.cache_resource
def load_data():
hn_filepath = 'Gita.xlsx'
return pd.read_excel(hn_filepath)
@st.cache_resource
def load_hn_model():
return SentenceTransformer('all-mpnet-base-v2')
hn_model = load_hn_model()
@st.cache_resource
def build_embeddings(hn_data):
return [hn_model.encode(hn_data['Enlgish Translation'][i], convert_to_tensor=True).numpy() for i in range(len(hn_data))]
@st.cache_resource
def build_annoy_index(shloka_embeddings):
embedding_size = len(shloka_embeddings[0])
annoy_index = AnnoyIndex(embedding_size, metric='angular')
for i, embedding in enumerate(shloka_embeddings):
annoy_index.add_item(i, embedding)
annoy_index.build(18) # 18 trees for faster search
return annoy_index
def wrap_text(text):
pass
# st.write("shree ganeshay namah")
hn_data = load_data()
shloka_embeddings = build_embeddings(hn_data)
annoy_index = build_annoy_index(shloka_embeddings)
st.title("GitaShlok Bhagavad Gita Assistant")
st.markdown(footer, unsafe_allow_html=True)
st.markdown(
"""
<style>
.reportview-container {
width: 90%;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown(
"""
<style>
.streamlit-text-container {
white-space: pre-line;
}
</style>
""",
unsafe_allow_html=True
)
query = st.text_input("Ask any question related to the Bhagavad Gita: ")
if st.button('Ask'):
query_embedding = hn_model.encode(query, convert_to_tensor=True).numpy()
# Use Annoy Index for efficient similarity search
similar_indices = annoy_index.get_nns_by_vector(query_embedding, 18)
# Process and display similar Shlokas
similarities = []
for curr_index in similar_indices:
similarity = util.cos_sim(query_embedding, shloka_embeddings[curr_index])
curr_shlok_details = {key: hn_data[key][curr_index] for key in hn_data}
similarities.append({"shlok_details": curr_shlok_details, "similarity": similarity})
# Get the most similar Shloka
top_result = sorted(similarities, key=lambda x: x["similarity"], reverse=True)[0]
top_shlok_details = top_result["shlok_details"]
adhyay_number = top_shlok_details['Chapter'].split(" ")[1]
shlok_number = top_shlok_details['Verse'].split(" ")[1].split(".")[1]
st.write("------------------------------")
st.write(f"{top_shlok_details['Chapter']} , Shloka : {shlok_number}")
wrapped_text = textwrap.fill(top_shlok_details['Enlgish Translation'], width=max_line_length)
wrapped_hindi_text=textwrap.fill(top_shlok_details['Hindi Anuvad'], width=max_line_length)
placeholder = st.empty()
prev_text=''
for char in wrapped_text:
prev_text=prev_text+char
placeholder.text(prev_text)
time.sleep(0.01) # Adjust the sleep duration as needed
st.write("\n------------------------------")
hindi_placeholder = st.empty()
hindi_text=''
for char in wrapped_hindi_text :
hindi_text=hindi_text+char
hindi_placeholder.text(hindi_text)
time.sleep(0.005) # Adjust the sleep duration as needed
st.write("\n------------------------------")