SeaLLM-Chat

Running on Zero

App Files Files Community

NGUYEN, Xuan Phi commited on Apr 18

Commit

7194bc8

•

1 Parent(s): 3a2a429

update

Browse files

Files changed (2) hide show

multipurpose_chatbot/demos/langchain_web_search.py +2 -251
multipurpose_chatbot/demos/websearch_chat_interface.py +5 -1

multipurpose_chatbot/demos/langchain_web_search.py CHANGED Viewed

@@ -144,6 +144,7 @@ class AnyEnginePipeline(BaseLLM):
         # List to hold all results
         text_generations: List[str] = []
         stop_strings = stop
         for i in range(0, len(prompts), self.batch_size):
             batch_prompts = prompts[i : i + self.batch_size]
             responses = []
@@ -156,7 +157,6 @@ class AnyEnginePipeline(BaseLLM):
                     text = text[len(prompt):]
                 if stop is not None and any(x in text for x in stop):
                     text = text[:text.index(stop[0])]
-                # print(f">>{text}")
                 text_generations.append(text)
         return LLMResult(
             generations=[[Generation(text=text)] for text in text_generations]
@@ -456,8 +456,7 @@ Let's begin! Below is the question from the user.
 def create_web_search_engine(model_engine=None):
     # from langchain_community.tools.tavily_search import TavilySearchResults
     if model_engine is None:
-        from ..globals import MODEL_ENGINE
-        model_engine = MODEL_ENGINE
     from langchain_core.utils.function_calling import (
         convert_to_openai_function,
@@ -472,8 +471,6 @@ def create_web_search_engine(model_engine=None):
     tools = [NewTavilySearchResults(max_results=1)]
     formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
-    # tools = load_tools(["llm-math"], llm=web_search_llm)
-    # formatted_tools = render_text_description_and_args(tools)
     prompt_template = ChatPromptTemplate.from_messages(
         [
             # (
@@ -510,249 +507,3 @@ def create_web_search_engine(model_engine=None):
-# if LANGCHAIN_AVAILABLE:
-#     class LooseReActJsonSingleInputOutputParser(ReActJsonSingleInputOutputParser):
-#         def parse(self, text: str) -> AgentAction | AgentFinish:
-#             try:
-#                 return super().parse(text)
-#             except OutputParserException as e:
-#                 return AgentFinish({"output": text}, text)
-    # class ChatHuggingfaceFromLocalPipeline(ChatHuggingFace):
-    #     @root_validator()
-    #     def validate_llm(cls, values: dict) -> dict:
-    #         return values
-    #     def _resolve_model_id(self) -> None:
-    #         """Resolve the model_id from the LLM's inference_server_url"""
-    #         self.model_id = self.llm.model_id
-    # class NewHuggingfacePipeline(HuggingFacePipeline):
-    #     bos_token = "<bos>"
-    #     add_bos_token = True
-    #     @classmethod
-    #     def from_model_id(
-    #         cls,
-    #         model_id: str,
-    #         task: str,
-    #         backend: str = "default",
-    #         device: Optional[int] = -1,
-    #         device_map: Optional[str] = None,
-    #         model_kwargs: Optional[dict] = None,
-    #         pipeline_kwargs: Optional[dict] = None,
-    #         batch_size: int = 2,
-    #         model = None,
-    #         **kwargs: Any,
-    #     ) -> HuggingFacePipeline:
-    #         """Construct the pipeline object from model_id and task."""
-    #         try:
-    #             from transformers import (
-    #                 AutoModelForCausalLM,
-    #                 AutoModelForSeq2SeqLM,
-    #                 AutoTokenizer,
-    #             )
-    #             from transformers import pipeline as hf_pipeline
-    #         except ImportError:
-    #             raise ValueError(
-    #                 "Could not import transformers python package. "
-    #                 "Please install it with `pip install transformers`."
-    #             )
-    #         _model_kwargs = model_kwargs or {}
-    #         tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs)
-    #         if model is None:
-    #             try:
-    #                 if task == "text-generation":
-    #                     if backend == "openvino":
-    #                         try:
-    #                             from optimum.intel.openvino import OVModelForCausalLM
-    #                         except ImportError:
-    #                             raise ValueError(
-    #                                 "Could not import optimum-intel python package. "
-    #                                 "Please install it with: "
-    #                                 "pip install 'optimum[openvino,nncf]' "
-    #                             )
-    #                         try:
-    #                             # use local model
-    #                             model = OVModelForCausalLM.from_pretrained(
-    #                                 model_id, **_model_kwargs
-    #                             )
-    #                         except Exception:
-    #                             # use remote model
-    #                             model = OVModelForCausalLM.from_pretrained(
-    #                                 model_id, export=True, **_model_kwargs
-    #                             )
-    #                     else:
-    #                         model = AutoModelForCausalLM.from_pretrained(
-    #                             model_id, **_model_kwargs
-    #                         )
-    #                 elif task in ("text2text-generation", "summarization", "translation"):
-    #                     if backend == "openvino":
-    #                         try:
-    #                             from optimum.intel.openvino import OVModelForSeq2SeqLM
-    #                         except ImportError:
-    #                             raise ValueError(
-    #                                 "Could not import optimum-intel python package. "
-    #                                 "Please install it with: "
-    #                                 "pip install 'optimum[openvino,nncf]' "
-    #                             )
-    #                         try:
-    #                             # use local model
-    #                             model = OVModelForSeq2SeqLM.from_pretrained(
-    #                                 model_id, **_model_kwargs
-    #                             )
-    #                         except Exception:
-    #                             # use remote model
-    #                             model = OVModelForSeq2SeqLM.from_pretrained(
-    #                                 model_id, export=True, **_model_kwargs
-    #                             )
-    #                     else:
-    #                         model = AutoModelForSeq2SeqLM.from_pretrained(
-    #                             model_id, **_model_kwargs
-    #                         )
-    #                 else:
-    #                     raise ValueError(
-    #                         f"Got invalid task {task}, "
-    #                         f"currently only {VALID_TASKS} are supported"
-    #                     )
-    #             except ImportError as e:
-    #                 raise ValueError(
-    #                     f"Could not load the {task} model due to missing dependencies."
-    #                 ) from e
-    #         else:
-    #             print(f'PIpeline skipping creation of model because model is given')
-    #         if tokenizer.pad_token is None:
-    #             tokenizer.pad_token_id = model.config.eos_token_id
-    #         if (
-    #             (
-    #                 getattr(model, "is_loaded_in_4bit", False)
-    #                 or getattr(model, "is_loaded_in_8bit", False)
-    #             )
-    #             and device is not None
-    #             and backend == "default"
-    #         ):
-    #             logger.warning(
-    #                 f"Setting the `device` argument to None from {device} to avoid "
-    #                 "the error caused by attempting to move the model that was already "
-    #                 "loaded on the GPU using the Accelerate module to the same or "
-    #                 "another device."
-    #             )
-    #             device = None
-    #         if (
-    #             device is not None
-    #             and importlib.util.find_spec("torch") is not None
-    #             and backend == "default"
-    #         ):
-    #             import torch
-    #             cuda_device_count = torch.cuda.device_count()
-    #             if device < -1 or (device >= cuda_device_count):
-    #                 raise ValueError(
-    #                     f"Got device=={device}, "
-    #                     f"device is required to be within [-1, {cuda_device_count})"
-    #                 )
-    #             if device_map is not None and device < 0:
-    #                 device = None
-    #             if device is not None and device < 0 and cuda_device_count > 0:
-    #                 logger.warning(
-    #                     "Device has %d GPUs available. "
-    #                     "Provide device={deviceId} to `from_model_id` to use available"
-    #                     "GPUs for execution. deviceId is -1 (default) for CPU and "
-    #                     "can be a positive integer associated with CUDA device id.",
-    #                     cuda_device_count,
-    #                 )
-    #         if device is not None and device_map is not None and backend == "openvino":
-    #             logger.warning("Please set device for OpenVINO through: " "'model_kwargs'")
-    #         if "trust_remote_code" in _model_kwargs:
-    #             _model_kwargs = {
-    #                 k: v for k, v in _model_kwargs.items() if k != "trust_remote_code"
-    #             }
-    #         _pipeline_kwargs = pipeline_kwargs or {}
-    #         pipeline = hf_pipeline(
-    #             task=task,
-    #             model=model,
-    #             tokenizer=tokenizer,
-    #             device=device,
-    #             device_map=device_map,
-    #             batch_size=batch_size,
-    #             model_kwargs=_model_kwargs,
-    #             **_pipeline_kwargs,
-    #         )
-    #         if pipeline.task not in VALID_TASKS:
-    #             raise ValueError(
-    #                 f"Got invalid task {pipeline.task}, "
-    #                 f"currently only {VALID_TASKS} are supported"
-    #             )
-    #         return cls(
-    #             pipeline=pipeline,
-    #             model_id=model_id,
-    #             model_kwargs=_model_kwargs,
-    #             pipeline_kwargs=_pipeline_kwargs,
-    #             batch_size=batch_size,
-    #             **kwargs,
-    #         )
-    #     def _generate(
-    #         self,
-    #         prompts: List[str],
-    #         stop: Optional[List[str]] = None,
-    #         run_manager: Optional[CallbackManagerForLLMRun] = None,
-    #         **kwargs: Any,
-    #     ) -> LLMResult:
-    #         # List to hold all results
-    #         text_generations: List[str] = []
-    #         pipeline_kwargs = kwargs.get("pipeline_kwargs", self.pipeline_kwargs)
-    #         pipeline_kwargs = pipeline_kwargs if len(pipeline_kwargs) > 0 else self.pipeline_kwargs
-    #         for i in range(0, len(prompts), self.batch_size):
-    #             batch_prompts = prompts[i : i + self.batch_size]
-    #             bos_token = self.pipeline.tokenizer.convert_ids_to_tokens(self.pipeline.tokenizer.bos_token_id)
-    #             for i in range(len(batch_prompts)):
-    #                 if not batch_prompts[i].startswith(bos_token) and self.add_bos_token:
-    #                     batch_prompts[i] = bos_token + batch_prompts[i]
-    #             # print(f'PROMPT: {stop=} {pipeline_kwargs=} ==================\n{batch_prompts[0]}\n==========================')
-    #             # Process batch of prompts
-    #             responses = self.pipeline(
-    #                 batch_prompts,
-    #                 **pipeline_kwargs,
-    #             )
-    #             # Process each response in the batch
-    #             for j, (prompt, response) in enumerate(zip(batch_prompts, responses)):
-    #                 if isinstance(response, list):
-    #                     # if model returns multiple generations, pick the top one
-    #                     response = response[0]
-    #                 if self.pipeline.task == "text-generation":
-    #                     text = response["generated_text"]
-    #                 elif self.pipeline.task == "text2text-generation":
-    #                     text = response["generated_text"]
-    #                 elif self.pipeline.task == "summarization":
-    #                     text = response["summary_text"]
-    #                 elif self.pipeline.task in "translation":
-    #                     text = response["translation_text"]
-    #                 else:
-    #                     raise ValueError(
-    #                         f"Got invalid task {self.pipeline.task}, "
-    #                         f"currently only {VALID_TASKS} are supported"
-    #                     )
-    #                 # Append the processed text to results
-    #                 if text.startswith(prompt):
-    #                     text = text[len(prompt):]
-    #                 if stop is not None and any(x in text for x in stop):
-    #                     text = text[:text.index(stop[0])]
-    #                 # print(f">>{text}")
-    #                 text_generations.append(text)
-    #         return LLMResult(
-    #             generations=[[Generation(text=text)] for text in text_generations]
-    #         )

         # List to hold all results
         text_generations: List[str] = []
         stop_strings = stop
+        print(f'Pipeline run: {len(prompts)}')
         for i in range(0, len(prompts), self.batch_size):
             batch_prompts = prompts[i : i + self.batch_size]
             responses = []
                     text = text[len(prompt):]
                 if stop is not None and any(x in text for x in stop):
                     text = text[:text.index(stop[0])]
                 text_generations.append(text)
         return LLMResult(
             generations=[[Generation(text=text)] for text in text_generations]
 def create_web_search_engine(model_engine=None):
     # from langchain_community.tools.tavily_search import TavilySearchResults
     if model_engine is None:
+        raise ValueError(f'model_engine empty')
     from langchain_core.utils.function_calling import (
         convert_to_openai_function,
     tools = [NewTavilySearchResults(max_results=1)]
     formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
     prompt_template = ChatPromptTemplate.from_messages(
         [
             # (

multipurpose_chatbot/demos/websearch_chat_interface.py CHANGED Viewed

@@ -115,8 +115,8 @@ def chat_web_search_response_stream_multiturn_engine(
     if len(message) == 0:
         raise gr.Error("The message cannot be empty!")
     response_output = agent_executor.invoke({"input": message})
-    print(response_output)
     response = response_output['output']
     full_prompt = gradio_history_to_conversation_prompt(message.strip(), history=history, system_prompt=system_prompt)
@@ -217,6 +217,10 @@ class WebSearchChatInterfaceDemo(BaseDemo):
         return demo_chat
 """
 run

     if len(message) == 0:
         raise gr.Error("The message cannot be empty!")
+    print(f'Begin agent_invoke.')
     response_output = agent_executor.invoke({"input": message})
     response = response_output['output']
     full_prompt = gradio_history_to_conversation_prompt(message.strip(), history=history, system_prompt=system_prompt)
         return demo_chat
 """
 run