import os import re import requests import json import pickle import numpy as np import pandas as pd from typing import List from typing import Optional from typing import Union import streamlit as st from database import execute_sql_query from bs4 import BeautifulSoup from aksharamukha import transliterate from sentence_transformers import util from langchain_nomic.embeddings import NomicEmbeddings from langchain_community.embeddings import HuggingFaceBgeEmbeddings def load_pickle(path): with open(path,'rb') as f: docs = pickle.load(f) return docs def initialize_embedding_model(model_name, device="cpu", normalize_embeddings=True): model_kwargs = {"device": device} encode_kwargs = {"normalize_embeddings": normalize_embeddings} return HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs) # embedding model for quick calculations os.environ['NOMIC_API_KEY'] = os.getenv('NOMIC_API_KEY') #nomic embed model used for similarity scores nomic_embed_model = NomicEmbeddings( dimensionality=128, model="nomic-embed-text-v1.5", ) def get_list_meaning_word(word): pada_meanings = {'pada': word, 'Monier-Williams Sanskrit-English Dictionary (1899)': [], 'Shabda-Sagara (1900)': [], 'Apte-Practical Sanskrit-English Dictionary (1890)': [], } url = f"https://ambuda.org/tools/dictionaries/mw,shabdasagara,apte/{word}" try: # Fetch HTML content response = requests.get(url) response.raise_for_status() # Parse HTML with BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # Extracting text from different tags divs = soup.find_all('div', class_='my-4', attrs={'x-show': 'show'}) try: # Find all list items
  • within the specified