Tejasw1 commited on
Commit
a3723dd
Β·
1 Parent(s): 1f2b621

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faissdb/index.faiss filter=lfs diff=lfs merge=lfs -text
__pycache__/gradio_app.cpython-311.pyc CHANGED
Binary files a/__pycache__/gradio_app.cpython-311.pyc and b/__pycache__/gradio_app.cpython-311.pyc differ
 
faissdb/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86c1246d6477f34bca9db9b8ab3bb1a10e70db706b34676599e2fac1474101d
3
+ size 151471149
faissdb/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8750363e543e852f0764682a184645dfe7d51d76394feb685b4ffee57dfcdd
3
+ size 55501392
gradio_app.py CHANGED
@@ -1,23 +1,22 @@
1
- # %%
2
-
3
  from threading import Thread
4
 
5
  import gradio as gr
 
 
 
 
 
 
 
6
  # import torch
7
  from text_generation import Client, InferenceAPIClient
8
 
9
  client = Client("http://20.83.177.108:8080")
10
 
11
 
12
- # text = ""
13
- # for response in client.generate_stream("What is Deep Learning?", max_new_tokens=20):
14
- # if not response.token.special:
15
- # text += response.token.text
16
- # print(text)
17
-
18
-
19
- def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
20
  # Get the model and tokenizer, and tokenize the user text.
 
21
 
22
  if len(user_text.strip()) == 0:
23
  print('blank')
@@ -29,7 +28,7 @@ def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
29
  ### Response: """
30
 
31
  text = ""
32
- for response in client.generate_stream(user_text, max_new_tokens=max_new_tokens, repetition_penalty=1.05):
33
  if not response.token.special:
34
  text += response.token.text
35
  yield text
@@ -41,6 +40,69 @@ def reset_textbox():
41
  return gr.update(value='')
42
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  with gr.Blocks() as demo:
45
  with gr.Row():
46
  with gr.Column(scale=4):
@@ -54,21 +116,23 @@ with gr.Blocks() as demo:
54
 
55
  with gr.Column(scale=1):
56
  max_new_tokens = gr.Slider(
57
- minimum=1, maximum=1000, value=250, step=10, interactive=True, label="Max New Tokens",
58
- )
59
- top_p = gr.Slider(
60
- minimum=0.05, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p (nucleus sampling)",
61
- )
62
- top_k = gr.Slider(
63
- minimum=1, maximum=50, value=50, step=1, interactive=True, label="Top-k",
64
  )
65
  temperature = gr.Slider(
66
- minimum=0.1, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",
67
  )
68
-
69
- user_text.submit(run_generation, [
70
- user_text, top_p, temperature, top_k, max_new_tokens], model_output)
71
- button_submit.click(run_generation, [
72
- user_text, top_p, temperature, top_k, max_new_tokens], model_output)
 
 
 
 
 
 
 
 
73
 
74
  demo.queue(max_size=32).launch(enable_queue=True)
 
 
 
1
  from threading import Thread
2
 
3
  import gradio as gr
4
+ from langchain.callbacks.manager import CallbackManager
5
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
8
+ from langchain.llms import HuggingFaceTextGenInference
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.vectorstores import FAISS
11
  # import torch
12
  from text_generation import Client, InferenceAPIClient
13
 
14
  client = Client("http://20.83.177.108:8080")
15
 
16
 
17
+ def run_generation_stream(user_text, f, max_new_tokens, temperature):
 
 
 
 
 
 
 
18
  # Get the model and tokenizer, and tokenize the user text.
19
+ print('called stream')
20
 
21
  if len(user_text.strip()) == 0:
22
  print('blank')
 
28
  ### Response: """
29
 
30
  text = ""
31
+ for response in client.generate_stream(user_text, max_new_tokens=max_new_tokens, repetition_penalty=1.05, temperature=temperature):
32
  if not response.token.special:
33
  text += response.token.text
34
  yield text
 
40
  return gr.update(value='')
41
 
42
 
43
+ model_name = "BAAI/bge-base-en"
44
+ # set True to compute cosine similarity
45
+ encode_kwargs = {'normalize_embeddings': True}
46
+
47
+ model_norm = HuggingFaceBgeEmbeddings(
48
+ model_name=model_name,
49
+ encode_kwargs=encode_kwargs
50
+ )
51
+
52
+
53
+ vectordb = FAISS.load_local('faissdb', embeddings=model_norm)
54
+ retriever = vectordb.as_retriever(
55
+ search_type='similarity', search_kwargs={"k": 5})
56
+
57
+
58
+ # relating to refer to Indian Penal Code(IPC), CrPC(Code of Criminal Procedure) for most cases and therefore laws
59
+ prompt_template = """You are an expert legal assistant with extensive knowledge about Indian law. Your task is to respond to the given query in a factually correct and consise manner unless asked for a detailed explanation. Assume the query is asked by a common man unless explicitly specified otherwise, therefore no special acts or laws like ones for railway , army , police would apply to them. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
60
+
61
+ {context}
62
+
63
+ Question: {question}
64
+ Response:"""
65
+
66
+
67
+ PROMPT = PromptTemplate(
68
+ template=prompt_template, input_variables=["context", "question"]
69
+ )
70
+
71
+
72
+ def run_generation(query, factual, max_tokens, temperature):
73
+ print('called non stream')
74
+
75
+ llm = HuggingFaceTextGenInference(
76
+ inference_server_url="http://20.83.177.108:8080/",
77
+ max_new_tokens=max_tokens,
78
+ top_k=10,
79
+ top_p=0.95,
80
+ typical_p=0.95,
81
+ temperature=temperature,
82
+ streaming=True if factual else False,
83
+ # repetition_penalty=1.1,
84
+ )
85
+
86
+ qa_chain = RetrievalQA.from_chain_type(llm=llm,
87
+ chain_type_kwargs={
88
+ "prompt": PROMPT},
89
+ retriever=retriever,
90
+ return_source_documents=True,
91
+ )
92
+
93
+ # text = ""
94
+ # if factual:
95
+ # response = llm(query, callbacks=[StreamingStdOutCallbackHandler()])
96
+ # print(response)
97
+ # # text += response
98
+ # yield response
99
+
100
+ # else:
101
+ llm_response = qa_chain(query)
102
+ print(llm_response['result'])
103
+ return llm_response['result']
104
+
105
+
106
  with gr.Blocks() as demo:
107
  with gr.Row():
108
  with gr.Column(scale=4):
 
116
 
117
  with gr.Column(scale=1):
118
  max_new_tokens = gr.Slider(
119
+ minimum=1, maximum=1000, value=50, step=10, interactive=True, label="Number of words to generate",
 
 
 
 
 
 
120
  )
121
  temperature = gr.Slider(
122
+ minimum=0.1, maximum=1.0, value=0.6, step=0.1, interactive=True, label="Randomness(can be between 0-1, 0 being least random)",
123
  )
124
+ factual = gr.Checkbox(
125
+ label='Turn on to get factually correct answers')
126
+
127
+ # user_text.submit(run_generation, [
128
+ # user_text, top_p, temperature, top_k, max_new_tokens], model_output)
129
+ # button_submit.click(run_generation, [
130
+ # user_text, top_p, temperature, top_k, max_new_tokens], model_output)
131
+
132
+ # user_text.submit(run_generation, [
133
+ # user_text, factual, max_new_tokens, temperature], model_output)
134
+ print('fac', factual.value)
135
+ button_submit.click(run_generation if factual.value else run_generation_stream, [
136
+ user_text, factual, max_new_tokens, temperature], model_output)
137
 
138
  demo.queue(max_size=32).launch(enable_queue=True)
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  text-generation
2
  gradio
 
 
 
1
  text-generation
2
  gradio
3
+ langchain
4
+ faiss-cpu