ishaan-mital commited on
Commit
d99b731
1 Parent(s): fbfaaa6
Files changed (2) hide show
  1. app.py +73 -90
  2. requirements.txt +4 -4
app.py CHANGED
@@ -1,96 +1,79 @@
1
- from gradio_client import Client
2
  import gradio as gr
3
- import requests
4
- # from langchain.chains import RetrievalQA
5
- # import pinecone
6
- # from langchain.vectorstores import Pinecone
7
  import os
8
- # import openai
9
- # import time
10
- # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
11
- # import transformers
12
- # from langchain.chains import RetrievalQA
13
-
14
- API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
15
- headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
16
- retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/7f5fz9pvt/")
17
- llm = Client("https://library-samples-zephyr-7b.hf.space/--replicas/b7p4f/")
18
- # embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
19
-
20
- # embed_model = HuggingFaceEmbeddings(
21
- # model_name=embed_model_id,
22
- # )
23
-
24
-
25
- # pinecone.init(
26
- # api_key=os.environ.get('PINECONE_API_KEY'),
27
- # environment=os.environ.get('PINECONE_ENVIRONMENT')
28
- # )
29
-
30
- # index_name='llama-rag'
31
- # index = pinecone.Index(index_name)
32
- # index.describe_index_stats()
33
- # text_field = 'text'
34
-
35
- # vectorstore = Pinecone(
36
- # index, embed_model.embed_query, text_field
37
- # )
38
-
39
-
40
- # headers = {"Authorization": "Bearer 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"}
41
- # generate_text = transformers.pipeline(
42
- # model="HuggingFaceH4/zephyr-7b-beta",
43
- # return_full_text=True, # langchain expects the full text
44
- # task='text-generation',
45
- # # we pass model parameters here too
46
- # temperature=0.7, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
47
- # max_new_tokens=512, # mex number of tokens to generate in the output
48
- # repetition_penalty=1.1, # without this output begins repeating
49
- # do_sample=True
50
- # )
51
-
52
- # from langchain.llms import HuggingFacePipeline
53
-
54
- # llm = HuggingFacePipeline(pipeline=generate_text)
55
-
56
- # rag_pipeline = RetrievalQA.from_chain_type(
57
- # llm=llm, chain_type='stuff',
58
- # retriever=vectorstore.as_retriever()
59
- # )
60
- def query(payload):
61
- response = requests.post(API_URL, headers=headers, json=payload)
62
- return response.json()
63
-
64
- def main(question):
65
- print(question)
66
- context = retrieval.predict(question)
67
- # try:
68
- print(context)
69
- answer = llm.predict(
70
- f'Question: {question} and context: {context}',
71
- "NCERT Helper!!", # str in 'System prompt' Textbox component
72
- 2048, # float (numeric value between 1 and 2048) in 'Max new tokens' Slider component
73
- 0.1, # float (numeric value between 0.1 and 4.0) in 'Temperature' Slider component
74
- 0.05, # float (numeric value between 0.05 and 1.0) in 'Top-p (nucleus sampling)' Slider component
75
- 1, # float (numeric value between 1 and 1000) in 'Top-k' Slider component
76
- 1, # float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
77
- api_name="/chat"
78
- )
79
- # except:
80
- # answer = llm.predict(
81
- # f'Question: {question}',
82
- # "NCERT Helper!!", # str in 'System prompt' Textbox component
83
- # 2048, # float (numeric value between 1 and 2048) in 'Max new tokens' Slider component
84
- # 0.1, # float (numeric value between 0.1 and 4.0) in 'Temperature' Slider component
85
- # 0.05, # float (numeric value between 0.05 and 1.0) in 'Top-p (nucleus sampling)' Slider component
86
- # 1, # float (numeric value between 1 and 1000) in 'Top-k' Slider component
87
- # 1, # float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
88
- # api_name="/chat"
89
- # )
90
- # answer = query({"inputs": {"question": question, "context": context}})
91
- return answer
92
 
93
- demo = gr.Interface(main, inputs = "text", outputs = "text")
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  if __name__ == "__main__":
96
  demo.launch()
 
 
1
  import gradio as gr
 
 
 
 
2
  import os
3
+ import pinecone
4
+ import time
5
+ # from torch import cuda
6
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
+ # import PyPDF2
8
+ # import re
9
+ from langchain.vectorstores import Pinecone
10
+ from langchain import HuggingFaceHub, LLMChain
11
+ from langchain.prompts import PromptTemplate
12
+ from langchain.chains import RetrievalQA
13
+
14
+ embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
15
+ # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
16
+
17
+ embed_model = HuggingFaceEmbeddings(
18
+ model_name=embed_model_id,
19
+ # model_kwargs={'device': device},
20
+ # encode_kwargs={'device': device, 'batch_size': 32}
21
+ )
22
+
23
+ # get API key from app.pinecone.io and environment from console
24
+ pinecone.init(
25
+ api_key=os.environ.get('PINECONE_API_KEY'),
26
+ environment=os.environ.get('PINECONE_ENVIRONMENT')
27
+ )
28
+ docs = [
29
+ "this is one document",
30
+ "and another document"
31
+ ]
32
+
33
+ embeddings = embed_model.embed_documents(docs)
34
+
35
+ index_name = 'llama-rag'
36
+
37
+ # if index_name not in pinecone.list_indexes():
38
+ # pinecone.create_index(
39
+ # index_name,
40
+ # dimension=len(embeddings[0]),
41
+ # metric='cosine'
42
+ # )
43
+ # # wait for index to finish initialization
44
+ # while not pinecone.describe_index(index_name).status['ready']:
45
+ # time.sleep(1)
46
+
47
+ index = pinecone.Index(index_name)
48
+ index.describe_index_stats()
49
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
51
 
52
+
53
+ text_field = 'text' # field in metadata that contains text content
54
+
55
+ vectorstore = Pinecone(
56
+ index, embed_model.embed_query, text_field
57
+ )
58
+
59
+
60
+ hub = HuggingFaceHub(repo_id = "HuggingFaceH4/zephyr-7b-beta",huggingfacehub_api_token="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")
61
+ print(hub)
62
+ prompt = PromptTemplate(
63
+ input_variables=["question"],
64
+ template="Question: {question}\nAnswer:",
65
+ )
66
+
67
+ rag_pipeline = RetrievalQA.from_chain_type(
68
+ llm=hub, chain_type='stuff',
69
+ retriever=vectorstore.as_retriever()
70
+ )
71
+
72
+ def question(question):
73
+ answer = rag_pipeline(question)
74
+ return answer
75
+
76
+ demo = gr.Interface(fn=question, inputs="text", outputs="text")
77
+
78
  if __name__ == "__main__":
79
  demo.launch()
requirements.txt CHANGED
@@ -3,9 +3,9 @@ gradio
3
  gradio_client
4
  gtts
5
  # openai==0.28
6
- # pydantic==1.10.9
7
- # langchain
8
- # pinecone-client==2.2.2
9
  # faiss-cpu
10
- # sentence_transformers
11
  # transformers
 
3
  gradio_client
4
  gtts
5
  # openai==0.28
6
+ pydantic==1.10.9
7
+ langchain
8
+ pinecone-client==2.2.2
9
  # faiss-cpu
10
+ sentence_transformers
11
  # transformers