Spaces:
Runtime error
Runtime error
Commit
•
9775576
1
Parent(s):
e0bf8bf
Upload 2 files
Browse files- .gitattributes +1 -0
- jainbooks.json +3 -0
- main.py +48 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
jainbooks.json filter=lfs diff=lfs merge=lfs -text
|
jainbooks.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35900be848ec0bc4aee631d4705611b86c64ed6cc6da61f4fc82bc4e07d48ec9
|
3 |
+
size 24099642
|
main.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
import requests
|
3 |
+
import chromadb.utils.embedding_functions as embedding_functions
|
4 |
+
import bs4
|
5 |
+
import json
|
6 |
+
import gradio as gr
|
7 |
+
import os
|
8 |
+
embeddingfunc = embedding_functions.HuggingFaceEmbeddingFunction(api_key=os.environ["hf_token"],model_name="BAAI/bge-small-en-v1.5")
|
9 |
+
# client = chromadb.PersistentClient(path="booksofjainism")
|
10 |
+
client = chromadb.HttpClient("https://shethjenil-chromadb-server.hf.space/",port=443)
|
11 |
+
elibbookAI = client.get_or_create_collection("jainebooks")
|
12 |
+
allbookdata = json.load(open("jainbooks.json","r"))
|
13 |
+
allsearch = [i['search'] for i in allbookdata]
|
14 |
+
class jainnlp:
|
15 |
+
@classmethod
|
16 |
+
def books(cls)->list[str]:
|
17 |
+
return list(set(elibbookAI.get(include=[ "documents" ])["documents"]))
|
18 |
+
@classmethod
|
19 |
+
def loaddata(cls,search:str,progress = gr.Progress(),lang:str="gu")->None:
|
20 |
+
for bookdata in allbookdata:
|
21 |
+
if bookdata['search'] == search:
|
22 |
+
bookname = bookdata['title_english']
|
23 |
+
id = bookdata['sr_no']
|
24 |
+
pages = int(bookdata["pages"])
|
25 |
+
if id not in cls.books():
|
26 |
+
for page,content in enumerate(["\n".join(i.split("\n")[3:]) for i in [i for i in bs4.BeautifulSoup(requests.get(f'https://jainqq.org/booktext/{bookname.replace(" ","_")}/{id}').content, 'html.parser').find('div').stripped_strings][::2]]):
|
27 |
+
try:
|
28 |
+
contenteng = requests.post("https://translate-pa.googleapis.com/v1/translateHtml", headers={"Content-Type": "application/json+protobuf","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36","X-Goog-Api-Key": "AIzaSyATBXajvzQLTDHEQbcpq0Ihe0vWDHmO520"}, json=[[content,lang,"en"],"wt_lib"]).json()[0][0]
|
29 |
+
elibbookAI.add(embeddings=embeddingfunc(contenteng),metadatas={"bookname":bookdata['search'],"page":page,"bookid":id,"originalcontent":content,"contenteng":contenteng,"contentimg":bs4.BeautifulSoup(requests.get(f"https://jainqq.org/explore/{id}/{page}").content, "html.parser").find("img",class_="img-fluid").get("src")},ids=f"{id}-{page}",documents=id)
|
30 |
+
progress(page/pages)
|
31 |
+
except:
|
32 |
+
pass
|
33 |
+
return "done"
|
34 |
+
@classmethod
|
35 |
+
def qna(thisclass,query:str,booklist:list[str] = None,notbooklist:list[str] = None,limit:int=1,lang:str="gu")->list:
|
36 |
+
if booklist:
|
37 |
+
booklist = {"bookid": {"$in": booklist}}
|
38 |
+
if notbooklist:
|
39 |
+
notbooklist = {"bookid": {"$nin": notbooklist}}
|
40 |
+
return [i["contentimg"] for i in elibbookAI.query(embeddingfunc(requests.get(f"https://translate.googleapis.com/translate_a/single?client=gtx&sl={lang}&tl=en&dt=t&q={query}").json()[0][0][0]),n_results=limit,where=booklist)["metadatas"][0]]
|
41 |
+
@classmethod
|
42 |
+
def reset():
|
43 |
+
client.reset()
|
44 |
+
return "done"
|
45 |
+
upload = gr.Interface(jainnlp.loaddata, gr.Dropdown(allsearch),gr.Textbox())
|
46 |
+
chatref = gr.Interface(jainnlp.qna,gr.Textbox(),gr.Gallery())
|
47 |
+
if __name__ == "__main__":
|
48 |
+
gr.TabbedInterface([upload,chatref],["Upload","Chat"]).launch()
|