Spaces:
Runtime error
Runtime error
Commit
•
bd2a59e
1
Parent(s):
9046162
Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,12 @@
|
|
1 |
import chromadb
|
2 |
import requests
|
3 |
import chromadb.utils.embedding_functions as embedding_functions
|
4 |
-
import
|
5 |
-
import json
|
6 |
import gradio as gr
|
7 |
import os
|
8 |
-
|
9 |
-
import base64
|
10 |
embeddingfunc = embedding_functions.HuggingFaceEmbeddingFunction(api_key=os.environ["hf_token"],model_name="sentence-transformers/all-MiniLM-L6-v2")
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
allsearch = [i['search'] for i in allbookdata]
|
16 |
-
class jainnlp:
|
17 |
-
@classmethod
|
18 |
-
def books(cls)->list[str]:
|
19 |
-
return list(set(elibbookAI.get(include=[ "documents" ])["documents"]))
|
20 |
-
@classmethod
|
21 |
-
def loaddata(cls,search:str,progress = gr.Progress(),lang:str="gu")->None:
|
22 |
-
for bookdata in allbookdata:
|
23 |
-
if bookdata['search'] == search:
|
24 |
-
bookname = bookdata['title_english']
|
25 |
-
id = bookdata['sr_no']
|
26 |
-
pages = int(bookdata["pages"])
|
27 |
-
if id not in cls.books():
|
28 |
-
for page,content in enumerate(["\n".join(i.split("\n")[3:]) for i in [i for i in bs4.BeautifulSoup(requests.get(f'https://jainqq.org/booktext/{bookname.replace(" ","_")}/{id}').content, 'html.parser').find('div').stripped_strings][::2]]):
|
29 |
-
try:
|
30 |
-
contenteng = requests.post("https://translate-pa.googleapis.com/v1/translateHtml", headers={"Content-Type": "application/json+protobuf","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36","X-Goog-Api-Key": "AIzaSyATBXajvzQLTDHEQbcpq0Ihe0vWDHmO520"}, json=[[content,lang,"en"],"wt_lib"]).json()[0][0]
|
31 |
-
elibbookAI.add(embeddings=embeddingfunc(contenteng),metadatas={"bookname":bookdata['search'],"page":page,"bookid":id,"originalcontent":content,"contenteng":contenteng,"contentimg":bs4.BeautifulSoup(requests.get(f"https://jainqq.org/explore/{id}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")},ids=f"{id}-{page}",documents=id)
|
32 |
-
progress(page/pages)
|
33 |
-
except:
|
34 |
-
pass
|
35 |
-
return "done"
|
36 |
-
@classmethod
|
37 |
-
def qna(thisclass,query:str,booklist:list[str] = None,notbooklist:list[str] = None,limit:int=1,lang:str="gu")->list:
|
38 |
-
if booklist:
|
39 |
-
booklist = {"bookid": {"$in": booklist}}
|
40 |
-
if notbooklist:
|
41 |
-
notbooklist = {"bookid": {"$nin": notbooklist}}
|
42 |
-
return [i["contentimg"] for i in elibbookAI.query(embeddingfunc(requests.get(f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={lang}&tl=en&dt=t&q={query}").json()[0][0][0]),n_results=limit,where=booklist)["metadatas"][0]]
|
43 |
-
@classmethod
|
44 |
-
def reset(cls,demo:str):
|
45 |
-
client.reset()
|
46 |
-
return "done"
|
47 |
-
@classmethod
|
48 |
-
def linkmaker(cls,text_input:str,filename:str="book.jainebookAI")->str:
|
49 |
-
file_obj = BytesIO()
|
50 |
-
file_obj.write(text_input.encode())
|
51 |
-
file_obj.seek(0)
|
52 |
-
return f'<a href="data:application/octet-stream;base64,{base64.b64encode(file_obj.getvalue()).decode()}" download="{filename}">Download File</a>'
|
53 |
-
|
54 |
-
@classmethod
|
55 |
-
def download(cls,sr_id:str,embedding:bool=True):
|
56 |
-
id = sr_id
|
57 |
-
if embedding:
|
58 |
-
value = elibbookAI.get(include=["embeddings","metadatas"],where={"bookid":id})
|
59 |
-
return cls.linkmaker(json.dumps({"embeddingenable":True,"DOCUMENT":id,"embedding":value["embeddings"],"metadatas":value["metadatas"]}))
|
60 |
-
else:
|
61 |
-
return cls.linkmaker(json.dumps({"embeddingenable":False,"DOCUMENT":id,"metadatas":elibbookAI.get(include=["metadatas"],where={"bookid":id})["metadatas"]}))
|
62 |
-
@classmethod
|
63 |
-
def upload(cls,file:gr.File)->str:
|
64 |
-
if file:
|
65 |
-
file = json.loads(BytesIO(file).read().decode())
|
66 |
-
documentid = file["DOCUMENT"]
|
67 |
-
metadata = file["metadatas"]
|
68 |
-
if file["embeddingenable"]:
|
69 |
-
elibbookAI.add(embeddings=file["embedding"],metadatas=file["metadatas"],documents=[documentid for i in range(len(metadata))],ids=[f"{documentid}-{metadataofpage['page']}" for metadataofpage in metadata])
|
70 |
-
else:
|
71 |
-
elibbookAI.add(embeddings=[embeddingfunc(metadataofpage["contenteng"]) for metadataofpage in metadata],metadatas=file["metadatas"],documents=file["DOCUMENT"],ids=[f"{documentid}-{metadataofpage['page']}" for metadataofpage in metadata])
|
72 |
-
return "done"
|
73 |
-
upload = gr.Interface(jainnlp.loaddata, gr.Dropdown(allsearch),gr.Textbox())
|
74 |
-
chatref = gr.Interface(jainnlp.qna,gr.Textbox(),gr.Gallery())
|
75 |
-
task = gr.Interface(jainnlp.reset,gr.Textbox(),gr.Textbox(),submit_btn="Reset")
|
76 |
-
downloadfile = gr.Interface(jainnlp.download,[gr.Dropdown(jainnlp.books()),gr.Checkbox(True,label="AI ALGORITHEM")],gr.HTML())
|
77 |
-
uploadfile = gr.Interface(jainnlp.upload,gr.File(file_types=[".jainebookAI"],type="binary"),gr.Textbox())
|
78 |
-
if __name__ == "__main__":
|
79 |
-
gr.TabbedInterface([upload,chatref,task,downloadfile,uploadfile],["Upload","Chat","RESET","DOWNLOADAIFILE","UPLOADFILE"]).launch()
|
|
|
1 |
import chromadb
|
2 |
import requests
|
3 |
import chromadb.utils.embedding_functions as embedding_functions
|
4 |
+
from ai4bharat.transliteration import XlitEngine
|
|
|
5 |
import gradio as gr
|
6 |
import os
|
7 |
+
e = XlitEngine("gu", beam_width=10)
|
|
|
8 |
embeddingfunc = embedding_functions.HuggingFaceEmbeddingFunction(api_key=os.environ["hf_token"],model_name="sentence-transformers/all-MiniLM-L6-v2")
|
9 |
+
elibbookAI = chromadb.HttpClient("https://shethjenil-chromadb-server.hf.space/",port=443).get_or_create_collection("jainebooks")
|
10 |
+
def qna(query:str,limit:int=1)->list:
|
11 |
+
return [i["contentimg"] for i in elibbookAI.query(embeddingfunc(requests.get(f"https://translate.googleapis.com/translate_a/single?client=gtx&sl=gu&tl=en&dt=t&q={e.translit_sentence(query)["gu"]}").json()[0][0][0]),n_results=limit)["metadatas"][0]]
|
12 |
+
gr.Interface(qna,[gr.Textbox(),gr.Slider(1, 4, value=1, label="Count")],gr.Gallery()).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|