Spaces:
Runtime error
Runtime error
import chromadb | |
import requests | |
import chromadb.utils.embedding_functions as embedding_functions | |
import bs4 | |
import json | |
import gradio as gr | |
import os | |
embeddingfunc = embedding_functions.HuggingFaceEmbeddingFunction(api_key=os.environ["hf_token"],model_name="BAAI/bge-small-en-v1.5") | |
# client = chromadb.PersistentClient(path="booksofjainism") | |
client = chromadb.HttpClient("https://shethjenil-chromadb-server.hf.space/",port=443) | |
elibbookAI = client.get_or_create_collection("jainebooks") | |
allbookdata = json.load(open("jainbooks.json","r")) | |
allsearch = [i['search'] for i in allbookdata] | |
class jainnlp: | |
def books(cls)->list[str]: | |
return list(set(elibbookAI.get(include=[ "documents" ])["documents"])) | |
def loaddata(cls,search:str,progress = gr.Progress(),lang:str="gu")->None: | |
for bookdata in allbookdata: | |
if bookdata['search'] == search: | |
bookname = bookdata['title_english'] | |
id = bookdata['sr_no'] | |
pages = int(bookdata["pages"]) | |
if id not in cls.books(): | |
for page,content in enumerate(["\n".join(i.split("\n")[3:]) for i in [i for i in bs4.BeautifulSoup(requests.get(f'https://jainqq.org/booktext/{bookname.replace(" ","_")}/{id}').content, 'html.parser').find('div').stripped_strings][::2]]): | |
try: | |
contenteng = requests.post("https://translate-pa.googleapis.com/v1/translateHtml", headers={"Content-Type": "application/json+protobuf","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36","X-Goog-Api-Key": "AIzaSyATBXajvzQLTDHEQbcpq0Ihe0vWDHmO520"}, json=[[content,lang,"en"],"wt_lib"]).json()[0][0] | |
elibbookAI.add(embeddings=embeddingfunc(contenteng),metadatas={"bookname":bookdata['search'],"page":page,"bookid":id,"originalcontent":content,"contenteng":contenteng,"contentimg":bs4.BeautifulSoup(requests.get(f"https://jainqq.org/explore/{id}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")},ids=f"{id}-{page}",documents=id) | |
progress(page/pages) | |
except: | |
pass | |
return "done" | |
def qna(thisclass,query:str,booklist:list[str] = None,notbooklist:list[str] = None,limit:int=1,lang:str="gu")->list: | |
if booklist: | |
booklist = {"bookid": {"$in": booklist}} | |
if notbooklist: | |
notbooklist = {"bookid": {"$nin": notbooklist}} | |
return [i["contentimg"] for i in elibbookAI.query(embeddingfunc(requests.get(f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={lang}&tl=en&dt=t&q={query}").json()[0][0][0]),n_results=limit,where=booklist)["metadatas"][0]] | |
def reset(): | |
client.reset() | |
return "done" | |
upload = gr.Interface(jainnlp.loaddata, gr.Dropdown(allsearch),gr.Textbox()) | |
chatref = gr.Interface(jainnlp.qna,gr.Textbox(),gr.Gallery()) | |
if __name__ == "__main__": | |
gr.TabbedInterface([upload,chatref],["Upload","Chat"]).launch() | |