Spaces:

NCTCMumbai
/

Customs_Manual_Chatbot

Running

NCTCMumbai commited on Apr 4

Commit

9ef7776

•

1 Parent(s): 1705054

Update backend/query_llm.py

Files changed (1) hide show

backend/query_llm.py CHANGED Viewed

@@ -9,21 +9,25 @@ from typing import Any, Dict, Generator, List
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
-tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
-temperature = 0.9
-top_p = 0.6
 repetition_penalty = 1.2
 OPENAI_KEY = getenv("OPENAI_API_KEY")
 HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
 hf_client = InferenceClient(
-        "mistralai/Mistral-7B-Instruct-v0.1",
         token=HF_TOKEN
         )
 def format_prompt(message: str, api_kind: str):
     """
     Formats the given message using a chat template.
@@ -46,7 +50,7 @@ def format_prompt(message: str, api_kind: str):
         raise ValueError("API is not supported")
-def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
              top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.

 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
+#tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
+tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
+temperature = 0.4
+#top_p = 0.6
 repetition_penalty = 1.2
 OPENAI_KEY = getenv("OPENAI_API_KEY")
 HF_TOKEN = getenv("HUGGING_FACE_HUB_TOKEN")
+# hf_client = InferenceClient(
+#         "mistralai/Mistral-7B-Instruct-v0.1",
+#         token=HF_TOKEN
+#         )
 hf_client = InferenceClient(
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
         token=HF_TOKEN
         )
 def format_prompt(message: str, api_kind: str):
     """
     Formats the given message using a chat template.
         raise ValueError("API is not supported")
+def generate_hf(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 4000,
              top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
     """
     Generate a sequence of tokens based on a given prompt and history using Mistral client.