jordyvl commited on
Commit
62d0d52
1 Parent(s): 50a7785

back to phi

Browse files
Files changed (2) hide show
  1. README.md +2 -1
  2. app.py +78 -28
README.md CHANGED
@@ -10,6 +10,7 @@ pinned: false
10
  preload_from_hub:
11
  - "microsoft/phi-2"
12
  - "BAAI/bge-small-en-v1.5"
 
 
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  preload_from_hub:
11
  - "microsoft/phi-2"
12
  - "BAAI/bge-small-en-v1.5"
13
+ - "HuggingFaceH4/zephyr-7b-alpha"
14
+ - "meta-llama/Meta-Llama-3-8B"
15
  ---
16
 
 
app.py CHANGED
@@ -20,12 +20,13 @@ CHEAPMODE = torch.cuda.is_available()
20
  # LLM = "HuggingFaceH4/zephyr-7b-alpha" if not CHEAPMODE else "microsoft/phi-2"
21
 
22
  config = {
23
- "LLM": "meta-llama/Meta-Llama-3-8B",
24
- # "LLM": "microsoft/phi-2",
 
25
  "embeddings": "BAAI/bge-small-en-v1.5",
26
  "similarity_top_k": 2,
27
  "context_window": 4048,
28
- "max_new_tokens": 150,
29
  "temperature": 0.7,
30
  "top_k": 5,
31
  "top_p": 0.95,
@@ -42,17 +43,17 @@ title = "Ask my thesis: Intelligent Automation for AI-Driven Document Understand
42
  title = center_element(title)
43
  description = """Chat with the thesis manuscript by asking questions and receive answers with reference to the page.
44
 
45
- <div class="span1">
46
  <a href="https://jordy-vl.github.io/assets/phdthesis/VanLandeghem_Jordy_PhD-thesis.pdf">
47
  <img src="https://ideogram.ai/api/images/direct/cc3Um6ClQkWJpVdXx6pWVA.png"
48
- title="Thesis.pdf" alt="Ideogram image generated with prompt engineering"/></a>
49
- </div>
50
 
51
  Technology used: [Llama-index](https://www.llamaindex.ai/), OS LLMs from HuggingFace
52
 
53
- Spoiler: a RAG application with a >1B LLM and online vector store can be quite slow on a 290 page document ⏳
54
  """
55
- # width="250"
56
  description = center_element(description)
57
 
58
  def messages_to_prompt(messages):
@@ -105,6 +106,7 @@ def load_RAG_pipeline(config):
105
  # Llama-index
106
  Settings.llm = llm
107
  Settings.embed_model = HuggingFaceEmbedding(model_name=config["embeddings"])
 
108
  Settings.chunk_size = config["chunk_size"]
109
  Settings.chunk_overlap = config["chunk_overlap"]
110
 
@@ -125,23 +127,16 @@ default_query_engine = load_RAG_pipeline(config)
125
 
126
  # These are placeholder functions to simulate the behavior of the RAG setup.
127
  # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
128
- def get_answer(question, temperature, nucleus_sampling, max_tokens, query_engine=default_query_engine):
129
  # Here you should implement the logic to generate an answer based on the question and the document.
130
  # For example, you could use a machine learning model for RAG.
131
  # answer = "This is a placeholder answer."
132
  # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
133
 
134
  # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
135
- if (
136
- temperature != config["temperature"]
137
- or nucleus_sampling != config["top_p"]
138
- or max_tokens != config["max_new_tokens"]
139
- ):
140
- config["temperature"] = temperature
141
- config["top_p"] = nucleus_sampling
142
- config["max_new_tokens"] = max_tokens
143
- query_engine = load_RAG_pipeline(config)
144
  response = query_engine.query(question)
 
145
  return response
146
 
147
 
@@ -156,32 +151,87 @@ def get_answer_page(response):
156
 
157
  # Create the gr.Interface function
158
  def ask_my_thesis(
159
- question, temperature=config["temperature"], nucleus_sampling=config["top_p"], max_tokens=config["max_new_tokens"]
 
 
 
 
 
 
 
 
 
 
160
  ):
161
  print(f"Got Q: {question}")
162
- answer = get_answer(question, temperature, nucleus_sampling, max_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  image, answer_page = get_answer_page(answer)
164
- return answer, image, answer_page
165
 
166
 
167
  # Set up the interface options based on the design in the image.
168
  output_image = gr.Image(label="Answer Page")
169
 
170
  # examples
171
- examples = [["Who is Jordy Van Landeghem"], []]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  iface = gr.Interface(
174
  fn=ask_my_thesis,
175
  inputs=[gr.Textbox(label="Question", placeholder="Type your question here...")],
176
- additional_inputs=[
177
- gr.Slider(0, 1, value=0.7, label="Temperature"),
178
- gr.Slider(0, 1, value=0.95, label="Nucleus Sampling"),
179
- gr.Slider(1, 500, value=150, label="Max Generated Number of Tokens"),
180
- ],
181
  outputs=[gr.Textbox(label="Answer"), output_image, gr.Label()],
 
182
  title=title,
183
  description=description,
184
- allow_flagging="never",
 
185
  )
186
  # https://github.com/gradio-app/gradio/issues/4309
187
 
 
20
  # LLM = "HuggingFaceH4/zephyr-7b-alpha" if not CHEAPMODE else "microsoft/phi-2"
21
 
22
  config = {
23
+ # "LLM": "meta-llama/Meta-Llama-3-8B",
24
+ "LLM": "microsoft/phi-2",
25
+ # "LLM": "HuggingFaceH4/zephyr-7b-alpha",
26
  "embeddings": "BAAI/bge-small-en-v1.5",
27
  "similarity_top_k": 2,
28
  "context_window": 4048,
29
+ "max_new_tokens": 200,
30
  "temperature": 0.7,
31
  "top_k": 5,
32
  "top_p": 0.95,
 
43
  title = center_element(title)
44
  description = """Chat with the thesis manuscript by asking questions and receive answers with reference to the page.
45
 
46
+ <div class="center">
47
  <a href="https://jordy-vl.github.io/assets/phdthesis/VanLandeghem_Jordy_PhD-thesis.pdf">
48
  <img src="https://ideogram.ai/api/images/direct/cc3Um6ClQkWJpVdXx6pWVA.png"
49
+ title="Thesis.pdf" alt="Ideogram image generated with prompt engineering" width="500" class="center"/></a>
50
+ </div> Click the visual above to be redirected to the PDF of the manuscript.
51
 
52
  Technology used: [Llama-index](https://www.llamaindex.ai/), OS LLMs from HuggingFace
53
 
54
+ Spoiler: a quickly hacked together RAG application with a >1B LLM and online vector store can be quite slow on a 290 page document ⏳ (10s+)
55
  """
56
+
57
  description = center_element(description)
58
 
59
  def messages_to_prompt(messages):
 
106
  # Llama-index
107
  Settings.llm = llm
108
  Settings.embed_model = HuggingFaceEmbedding(model_name=config["embeddings"])
109
+ print(Settings)
110
  Settings.chunk_size = config["chunk_size"]
111
  Settings.chunk_overlap = config["chunk_overlap"]
112
 
 
127
 
128
  # These are placeholder functions to simulate the behavior of the RAG setup.
129
  # You would need to implement these with the actual logic to retrieve and generate answers based on the document.
130
+ def get_answer(question, config, query_engine=default_query_engine):
131
  # Here you should implement the logic to generate an answer based on the question and the document.
132
  # For example, you could use a machine learning model for RAG.
133
  # answer = "This is a placeholder answer."
134
  # https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
135
 
136
  # if temperature or nucleus sampling or max_tokens != as in config, recall query engine
137
+
 
 
 
 
 
 
 
 
138
  response = query_engine.query(question)
139
+ print(f"A: {response}")
140
  return response
141
 
142
 
 
151
 
152
  # Create the gr.Interface function
153
  def ask_my_thesis(
154
+ question,
155
+ LLM=config["LLM"],
156
+ embeddings=config["embeddings"],
157
+ similarity_top_k=config["similarity_top_k"],
158
+ context_window=config["context_window"],
159
+ max_new_tokens=config["max_new_tokens"],
160
+ temperature=config["temperature"],
161
+ top_k=config["top_k"],
162
+ top_p=config["top_p"],
163
+ chunk_size=config["chunk_size"],
164
+ chunk_overlap=config["chunk_overlap"],
165
  ):
166
  print(f"Got Q: {question}")
167
+ query_engine = default_query_engine
168
+
169
+ # if any change in kwargs
170
+ # Check if any of the kwargs have changed
171
+ if (
172
+ temperature != config["temperature"]
173
+ or top_p != config["top_p"]
174
+ or max_new_tokens != config["max_new_tokens"]
175
+ or LLM != config["LLM"]
176
+ or embeddings != config["embeddings"]
177
+ or similarity_top_k != config["similarity_top_k"]
178
+ or context_window != config["context_window"]
179
+ or top_k != config["top_k"]
180
+ or chunk_size != config["chunk_size"]
181
+ or chunk_overlap != config["chunk_overlap"]
182
+ ):
183
+ # Update the config dictionary with the new values
184
+ config["temperature"] = temperature
185
+ config["top_p"] = top_p
186
+ config["max_new_tokens"] = max_new_tokens
187
+ # config["LLM"] = LLM
188
+ # config["embeddings"] = embeddings
189
+ config["similarity_top_k"] = similarity_top_k
190
+ config["context_window"] = context_window
191
+ config["top_k"] = top_k
192
+ config["chunk_size"] = chunk_size
193
+ config["chunk_overlap"] = chunk_overlap
194
+ query_engine = load_RAG_pipeline(config)
195
+
196
+ answer = get_answer(question, config, query_engine=query_engine)
197
  image, answer_page = get_answer_page(answer)
198
+ return answer.response, image, answer_page
199
 
200
 
201
  # Set up the interface options based on the design in the image.
202
  output_image = gr.Image(label="Answer Page")
203
 
204
  # examples
205
+ examples = [
206
+ ["What model is state-of-the-art on DUDE?"],
207
+ ["Why is knowledge distillation interesting?"],
208
+ ["What is ANLS?"],
209
+ ]
210
+ # Define additional Gradio input elements
211
+ additional_inputs = [
212
+ # gr.Input("text", label="Question"),
213
+ # gr.Input("text", label="LLM", value=config["LLM"]),
214
+ # gr.Input("text", label="Embeddings", value=config["embeddings"]),
215
+ gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
216
+ gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
217
+ gr.Slider(20, 250, value=config["max_new_tokens"], label="Max New Tokens"),
218
+ gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
219
+ gr.Slider(1, 10, value=config["top_k"], label="Top K"),
220
+ gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
221
+ gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
222
+ gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
223
+ ]
224
 
225
  iface = gr.Interface(
226
  fn=ask_my_thesis,
227
  inputs=[gr.Textbox(label="Question", placeholder="Type your question here...")],
228
+ additional_inputs=additional_inputs,
 
 
 
 
229
  outputs=[gr.Textbox(label="Answer"), output_image, gr.Label()],
230
+ examples=examples,
231
  title=title,
232
  description=description,
233
+ allow_flagging="auto",
234
+ cache_examples=True,
235
  )
236
  # https://github.com/gradio-app/gradio/issues/4309
237