R Jayanth Jadhav commited on
Commit
59850b8
1 Parent(s): 5a83205

app.py create

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ import os
6
+ import gradio as gr
7
+ import chromadb
8
+ from sentence_transformers import SentenceTransformer
9
+ import pandas as pd
10
+ import numpy as np
11
+
12
+ from chromadb.utils import embedding_functions
13
+ from huggingface_hub import InferenceClient
14
+
15
+ from dotenv import load_dotenv, find_dotenv
16
+ _ = load_dotenv(find_dotenv())
17
+ hf_api_key = os.environ['HF_API_KEY']
18
+
19
+ dfs = pd.read_csv('Patents.csv')
20
+ ids= [str(x) for x in dfs.index.tolist()]
21
+ docs = dfs['text'].tolist()
22
+ client = chromadb.Client()
23
+ collection = client.get_or_create_collection("patents")
24
+
25
+ collection.add(documents=docs,ids=ids)
26
+
27
+ def text_embedding(text)-> None:
28
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
29
+ return model.encode(text)
30
+
31
+ def gen_context(query):
32
+ vector = text_embedding(query).tolist()
33
+
34
+ results = collection.query(
35
+ query_embeddings=vector,n_results=15,include=["documents"])
36
+
37
+ res = "\n".join(str(item) for item in results['documents'][0])
38
+ return res
39
+
40
+ def chat_completion(user_prompt):
41
+ length = 1000
42
+ system_prompt = """\
43
+ You are a helpful AI assistant that can answer questions on the patents dataset. Answer based on the context provided.If you cannot find the correct answer, say I don't know. Be concise and just include the response"""
44
+ final_prompt = f"""<s>[INST]<<SYS>>
45
+ {system_prompt}
46
+ <</SYS>>
47
+
48
+ {user_prompt}[/INST]"""
49
+
50
+ return client.text_generation(prompt=final_prompt,max_new_tokens = length).strip()
51
+
52
+
53
+
54
+ client = InferenceClient(model = "mistralai/Mixtral-8x7B-Instruct-v0.1")
55
+
56
+ def chat_completion(query):
57
+ length = 1000
58
+
59
+ context = gen_context(query)
60
+
61
+ user_prompt = f"""
62
+ Based on the context:
63
+ {context}
64
+ Answer the below query:
65
+ {query}
66
+ """
67
+ system_prompt = """\
68
+ You are a helpful AI assistant that can answer questions on the patents dataset. Answer based on the context provided.If you cannot find the correct answer, say I don't know. Be concise and just include the response"""
69
+ final_prompt = f"""<s>[INST]<<SYS>>
70
+ {system_prompt}
71
+ <</SYS>>
72
+
73
+ {user_prompt}[/INST]"""
74
+
75
+
76
+
77
+ return client.text_generation(prompt=final_prompt,max_new_tokens = length).strip()
78
+
79
+ demo = gr.Interface(fn=chat_completion,
80
+ inputs=[gr.Textbox(label="Query", lines=2)],
81
+ outputs=[gr.Textbox(label="Result", lines=16)],
82
+ title="Chat on Patents Data")
83
+
84
+ demo.queue().launch(share=True)