Spaces:

harisankar99
/

EVolve

Sleeping

App Files Files Community

harisankar99 commited on May 28

Commit

374a0ea

•

1 Parent(s): 36d181b

Added articles and app

Browse files

Files changed (4) hide show

app.py +191 -42
articles_embedded/data-00000-of-00001.arrow +3 -0
articles_embedded/dataset_info.json +35 -0
articles_embedded/state.json +13 -0

app.py CHANGED Viewed

@@ -1,63 +1,212 @@
-import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
     for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
         stream=True,
-        temperature=temperature,
-        top_p=top_p,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

+# import gradio as gr
 from huggingface_hub import InferenceClient
+# """
+# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+# """
+# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# def respond(
+#     message,
+#     history: list[tuple[str, str]],
+#     system_message,
+#     max_tokens,
+#     temperature,
+#     top_p,
+# ):
+#     messages = [{"role": "system", "content": system_message}]
+#     for val in history:
+#         if val[0]:
+#             messages.append({"role": "user", "content": val[0]})
+#         if val[1]:
+#             messages.append({"role": "assistant", "content": val[1]})
+#     messages.append({"role": "user", "content": message})
+#     response = ""
+#     for message in client.chat_completion(
+#         messages,
+#         max_tokens=max_tokens,
+#         stream=True,
+#         temperature=temperature,
+#         top_p=top_p,
+#     ):
+#         token = message.choices[0].delta.content
+#         response += token
+#         yield response
+# """
+# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+# """
+# demo = gr.ChatInterface(
+#     respond,
+#     additional_inputs=[
+#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+#         gr.Slider(
+#             minimum=0.1,
+#             maximum=1.0,
+#             value=0.95,
+#             step=0.05,
+#             label="Top-p (nucleus sampling)",
+#         ),
+#     ],
+# )
+# if __name__ == "__main__":
+#     demo.launch()
+import gradio as gr
+import os
+import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
+import torch
+from threading import Thread
+from datasets import load_from_disk
+import time
+from sentence_transformers import SentenceTransformer
+ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
+dataset =load_from_disk('./articles_embedded')
+data = dataset
+data = data.add_faiss_index("embeddings") # column name that has the embeddings of the dataset
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
+# model_id = r"D:\Meta-Llama-3-8B-Instruct"
+# use quantization to lower GPU usage
+# bnb_config = BitsAndBytesConfig(
+#     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
+# )
+# tokenizer = AutoTokenizer.from_pretrained(model_id)
+# model = AutoModelForCausalLM.from_pretrained(
+#     model_id,
+#     torch_dtype=torch.bfloat16,
+#     device_map="auto",
+#     quantization_config=bnb_config,
+# )
+# model = AutoModelForCausalLM.from_pretrained(
+#     "microsoft/Phi-3-mini-4k-instruct",
+#     device_map="cuda",
+#     torch_dtype="auto",
+#     trust_remote_code=True,
+# )
+# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
+# terminators = [
+#     tokenizer.eos_token_id,
+#     tokenizer.convert_tokens_to_ids("<|eot_id|>")
+# ]
+SYS_PROMPT = """You are an battery assistant named EVolve made by the company Lime.ai in Bangalore for answering questions only related to batteries and EV industry.
+You are given the extracted parts of a long document and a question. Provide a conversational answer.
+If context is more than than 50 percent related to question give any doi or reference if prompted else dont give even if prompted.
+If you don't know the answer,Say I dont know.""Try to answer out of context if you are more than 50 percent confident else say I dont know. Do not mention about what is context you have been given. Answer should be such that you already know the context and you are not reading from the context.Don't make up an answer."""#just say "I do not know." Don't make up an answer."""
+def search(query: str, k: int = 3 ):
+    """a function that embeds a new query and returns the most probable results"""
+    embedded_query = ST.encode(query) # embed new query
+    scores, retrieved_examples = data.get_nearest_examples( # retrieve results
+        "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
+        k=k # get only top k results
+    )
+    return scores, retrieved_examples
+def format_prompt(prompt,retrieved_documents,k):
+  """using the retrieved documents we will prompt the model to generate our responses"""
+  PROMPT = f"Question:{prompt}+Tell me the reference and doi from where you have taken the answer if it is available.\nContext:"
+  for idx in range(k) :
+    PROMPT+= f"Reference: "+str(retrieved_documents['title'][idx])+"\n doi: "+str(retrieved_documents['doi'][idx])+"\n Authors:"+str(retrieved_documents['author'][idx])+"\n Page Number:"+str(retrieved_documents['pages'][idx])+"\n Content: "+str(retrieved_documents['text'][idx])+"\n"
+  return PROMPT
+@spaces.GPU(duration=150)
+def talk(prompt,history):
+    k = 1 # number of retrieved documents
+    scores , retrieved_documents = search(prompt, k)
+    formatted_prompt = format_prompt(prompt,retrieved_documents,k)
+    formatted_prompt = formatted_prompt[:2000] # to avoid GPU OOM
+    messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
+    # tell the model to generate
+    # input_ids = tokenizer.apply_chat_template(
+    #   messages,
+    #   add_generation_prompt=True,
+    #   return_tensors="pt"
+    # ).to(model.device)
     for message in client.chat_completion(
         messages,
+        max_tokens=1024,
         stream=True,
+        temperature=0.6,
+        top_p=0.9,
     ):
         token = message.choices[0].delta.content
         response += token
         yield response
+    # outputs = model.generate(
+    #   input_ids,
+    #   max_new_tokens=1024,
+    #   eos_token_id=terminators,
+    #   do_sample=True,
+    #   temperature=0.6,
+    #   top_p=0.9,
+    # )
+    # streamer = TextIteratorStreamer(
+    #         tokenizer, timeout=10000.0, skip_prompt=True, skip_special_tokens=True
+    #     )
+    # generate_kwargs = dict(
+    #     input_ids= input_ids,
+    #     streamer=streamer,
+    #     max_new_tokens=1024,
+    #     do_sample=True,
+    #     top_p=0.95,
+    #     temperature=0.75,
+    #     eos_token_id=terminators,
+    # )
+    # t = Thread(target=model.generate, kwargs=generate_kwargs)
+    # t.start()
+    # outputs = []
+    # for text in streamer:
+    #     outputs.append(text)
+    #     print(outputs)
+    #     yield "".join(outputs)
+TITLE = "EVolve AI"
+DESCRIPTION = """
+This is a project by Lime.ai
+Resources used to build this project :
+* embedding model : https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
+* faiss docs : https://huggingface.co/docs/datasets/v2.18.0/en/package_reference/main_classes#datasets.Dataset.add_faiss_index
+* chatbot : https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
 """
 demo = gr.ChatInterface(
+    fn=talk,
+    chatbot=gr.Chatbot(
+        show_label=True,
+        show_share_button=True,
+        show_copy_button=True,
+        likeable=True,
+        layout="bubble",
+        bubble_full_width=False,
+    ),
+    theme="Soft",
+    examples=[["What are the reasons of capacity fade due to LAM and LLI?"],["How much cycles does Li-air batteries last before degradation?"],["What are different types of battery chemistries?"]],
+    title=TITLE,
+    description=DESCRIPTION,
 )
 if __name__ == "__main__":
+    demo.launch(debug=True,share=True)

articles_embedded/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5213cd83fb8bab5ddcb64c85b9c7e7b66049a15d69a96d6ef92a64ed70b82267
+size 5332744

articles_embedded/dataset_info.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "doi": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "title": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "author": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "pages": {
+      "dtype": "int64",
+      "_type": "Value"
+    },
+    "text": {
+      "dtype": "string",
+      "_type": "Value"
+    },
+    "embeddings": {
+      "feature": {
+        "dtype": "float32",
+        "_type": "Value"
+      },
+      "_type": "Sequence"
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

articles_embedded/state.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "c280083923fa8c13",
+  "_format_columns": null,
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}