Spaces:

sabazo
/

innoSageAgentOne

Sleeping

App Files Files Community

sabazo commited on Feb 28, 2024

Commit

feae37f

unverified ·

2 Parent(s): 647a5e9 c8fe20b

Merge pull request #3 from almutareb/reference_parser

Browse files

Files changed (2) hide show

mixtral_agent.py +116 -51
requirements.txt +183 -0

mixtral_agent.py CHANGED Viewed

@@ -12,9 +12,15 @@ from langchain.agents.format_scratchpad import format_log_to_str
 from langchain.agents.output_parsers import (
     ReActJsonSingleInputOutputParser,
 )
 from langchain.tools.render import render_text_description
 import os
 import dotenv
 dotenv.load_dotenv()
@@ -26,39 +32,92 @@ OLLMA_BASE_URL = os.getenv("OLLMA_BASE_URL")
 # supports many more optional parameters. Hover on your `ChatOllama(...)`
 # class to view the latest available supported parameters
 llm = ChatOllama(
-    model="mistral",
     base_url= OLLMA_BASE_URL
     )
 prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
-# using LangChain Expressive Language chain syntax
-# learn more about the LCEL on
-# https://python.langchain.com/docs/expression_language/why
-chain = prompt | llm | StrOutputParser()
-# for brevity, response is printed in terminal
-# You can use LangServe to deploy your application for
-# production
-print(chain.invoke({"topic": "Space travel"}))
-retriever = ArxivRetriever(load_max_docs=2)
-tools = [
-    create_retriever_tool(
-    retriever,
-    "search arxiv's database for",
-    "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
-    # "Searches and returns excerpts from the 2022 State of the Union.",
-    ),
-    Tool(
-        name="SerpAPI",
-        description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
-        func=SerpAPIWrapper().run,
-    )
-]
 prompt = hub.pull("hwchase17/react-json")
@@ -85,33 +144,39 @@ agent_executor = AgentExecutor(
     agent=agent,
     tools=tools,
     verbose=True,
-    handle_parsing_errors=True #prevents error
     )
-# agent_executor.invoke(
-#     {
-#         "input": "Who is the current holder of the speed skating world record on 500 meters? What is her current age raised to the 0.43 power?"
-#     }
-# )
-# agent_executor.invoke(
-#     {
-#         "input": "what are large language models and why are they so expensive to run?"
-#     }
-# )
-# agent_executor.invoke(
-#     {
-#         "input": "How to generate videos from images using state of the art macchine learning models"
-#     }
-# )
-agent_executor.invoke(
-    {
-        "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever  " +
-        "add the urls of the papers used in the final answer using the metadata from the retriever"
-        # f"Please prioritize the newest papers this is the current data {get_current_date()}"
-    }
-)

 from langchain.agents.output_parsers import (
     ReActJsonSingleInputOutputParser,
 )
+# Import things that are needed generically
+from langchain.pydantic_v1 import BaseModel, Field
+from langchain.tools import BaseTool, StructuredTool, tool
+from typing import List, Dict
+from datetime import datetime
 from langchain.tools.render import render_text_description
 import os
 import dotenv
 dotenv.load_dotenv()
 # supports many more optional parameters. Hover on your `ChatOllama(...)`
 # class to view the latest available supported parameters
 llm = ChatOllama(
+    model="mistral:instruct",
     base_url= OLLMA_BASE_URL
     )
 prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")
+arxiv_retriever = ArxivRetriever(load_max_docs=2)
+def format_info_list(info_list: List[Dict[str, str]]) -> str:
+    """
+    Format a list of dictionaries containing information into a single string.
+    Args:
+        info_list (List[Dict[str, str]]): A list of dictionaries containing information.
+    Returns:
+        str: A formatted string containing the information from the list.
+    """
+    formatted_strings = []
+    for info_dict in info_list:
+        formatted_string = "|"
+        for key, value in info_dict.items():
+            if isinstance(value, datetime.date):
+                value = value.strftime('%Y-%m-%d')
+            formatted_string += f"'{key}': '{value}', "
+        formatted_string = formatted_string.rstrip(', ') + "|"
+        formatted_strings.append(formatted_string)
+    return '\n'.join(formatted_strings)
+@tool
+def arxiv_search(query: str) -> str:
+    """Using the arxiv search and collects metadata."""
+    # return "LangChain"
+    global all_sources
+    data = arxiv_retriever.invoke(query)
+    meta_data = [i.metadata for i in data]
+    # meta_data += all_sources
+    # all_sources += meta_data
+    all_sources += meta_data
+    # formatted_info = format_info(entry_id, published, title, authors)
+    # formatted_info = format_info_list(all_sources)
+    return meta_data.__str__()
+@tool
+def google_search(query: str) -> str:
+    """Using the google search and collects metadata."""
+    # return "LangChain"
+    global all_sources
+    x = SerpAPIWrapper()
+    search_results:dict = x.results(query)
+    organic_source = search_results['organic_results']
+    # formatted_string = "Title: {title}, link: {link}, snippet: {snippet}".format(**organic_source)
+    cleaner_sources = ["Title: {title}, link: {link}, snippet: {snippet}".format(**i) for i in organic_source]
+    all_sources += cleaner_sources
+    return cleaner_sources.__str__()
+    # return organic_source
+tools = [arxiv_search,google_search]
+# tools = [
+#     create_retriever_tool(
+#     retriever,
+#     "search arxiv's database for",
+#     "Use this to recomend the user a paper to read Unless stated please choose the most recent models",
+#     # "Searches and returns excerpts from the 2022 State of the Union.",
+#     ),
+#     Tool(
+#         name="SerpAPI",
+#         description="A low-cost Google Search API. Useful for when you need to answer questions about current events. Input should be a search query.",
+#         func=SerpAPIWrapper().run,
+#     )
+# ]
 prompt = hub.pull("hwchase17/react-json")
     agent=agent,
     tools=tools,
     verbose=True,
+    # handle_parsing_errors=True #prevents error
     )
+if __name__ == "__main__":
+    # global variable for collecting sources
+    all_sources =  []
+    input = agent_executor.invoke(
+        {
+            "input": "How to generate videos from images using state of the art macchine learning models; Using the axriv retriever  " +
+            "add the urls of the papers used in the final answer using the metadata from the retriever please do not use '`' "
+            # f"Please prioritize the newest papers this is the current data {get_current_date()}"
+        }
+    )
+    # input_1 = agent_executor.invoke(
+    #     {
+    #         "input": "I am looking for a text to 3d model; Using the axriv retriever  " +
+    #         "add the urls of the papers used in the final answer using the metadata from the retriever"
+    #         # f"Please prioritize the newest papers this is the current data {get_current_date()}"
+    #     }
+    # )
+    # input_1 = agent_executor.invoke(
+    #     {
+    #         "input": "I am looking for a text to 3d model; Using the google search tool " +
+    #         "add the urls in the final answer using the metadata from the retriever, also provid a summary of the searches"
+    #         # f"Please prioritize the newest papers this is the current data {get_current_date()}"
+    #     }
+    # )
+    x = 0

requirements.txt CHANGED Viewed

@@ -189,3 +189,186 @@ websockets==11.0.3
 wrapt==1.16.0
 yarl==1.9.4
 zipp==3.17.0

 wrapt==1.16.0
 yarl==1.9.4
 zipp==3.17.0
+aiofiles==23.2.1
+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.2.0
+arxiv==2.1.0
+asgiref==3.7.2
+async-timeout==4.0.3
+attrs==23.2.0
+backoff==2.2.1
+bcrypt==4.1.2
+beautifulsoup4==4.12.3
+boto3==1.34.42
+botocore==1.34.42
+build==1.0.3
+cachetools==5.3.2
+certifi==2024.2.2
+chardet==5.2.0
+charset-normalizer==3.3.2
+chroma-hnswlib==0.7.3
+chromadb==0.4.22
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.2.0
+cycler==0.12.1
+dataclasses-json==0.6.4
+dataclasses-json-speakeasy==0.5.11
+Deprecated==1.2.14
+emoji==2.10.1
+exceptiongroup==1.2.0
+faiss-cpu==1.7.4
+fastapi==0.109.2
+feedparser==6.0.10
+ffmpy==0.3.2
+filelock==3.13.1
+filetype==1.2.0
+flatbuffers==23.5.26
+fonttools==4.48.1
+frozenlist==1.4.1
+fsspec==2024.2.0
+gitdb==4.0.11
+GitPython==3.1.41
+google-auth==2.27.0
+google_search_results==2.4.2
+googleapis-common-protos==1.62.0
+gradio==3.48.0
+gradio_client==0.6.1
+greenlet==3.0.3
+grpcio==1.60.1
+h11==0.14.0
+httpcore==1.0.3
+httptools==0.6.1
+httpx==0.26.0
+huggingface-hub==0.20.3
+humanfriendly==10.0
+idna==3.6
+importlib-metadata==6.11.0
+importlib-resources==6.1.1
+Jinja2==3.1.3
+jmespath==1.0.1
+joblib==1.3.2
+jsonpatch==1.33
+jsonpath-python==1.0.6
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+kubernetes==29.0.0
+langchain==0.1.7
+langchain-community==0.0.20
+langchain-core==0.1.23
+langchainhub==0.1.14
+langdetect==1.0.9
+langsmith==0.0.87
+lxml==5.1.0
+MarkupSafe==2.1.5
+marshmallow==3.20.2
+matplotlib==3.8.3
+mmh3==4.1.0
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.19.3
+nvidia-nvjitlink-cu12==12.3.101
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.2.2
+onnxruntime==1.17.0
+opentelemetry-api==1.22.0
+opentelemetry-exporter-otlp-proto-common==1.22.0
+opentelemetry-exporter-otlp-proto-grpc==1.22.0
+opentelemetry-instrumentation==0.43b0
+opentelemetry-instrumentation-asgi==0.43b0
+opentelemetry-instrumentation-fastapi==0.43b0
+opentelemetry-proto==1.22.0
+opentelemetry-sdk==1.22.0
+opentelemetry-semantic-conventions==0.43b0
+opentelemetry-util-http==0.43b0
+orjson==3.9.14
+overrides==7.7.0
+packaging==23.2
+pandas==2.2.0
+pillow==10.2.0
+posthog==3.4.1
+protobuf==4.25.2
+pulsar-client==3.4.0
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pydantic==2.6.1
+pydantic_core==2.16.2
+pydub==0.25.1
+pyparsing==3.1.1
+PyPika==0.48.9
+pyproject_hooks==1.0.0
+python-dateutil==2.8.2
+python-dotenv==1.0.1
+python-iso639==2024.2.7
+python-magic==0.4.27
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+rapidfuzz==3.6.1
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+requests-oauthlib==1.3.1
+rpds-py==0.18.0
+rsa==4.9
+s3transfer==0.10.0
+safetensors==0.4.2
+scikit-learn==1.4.0
+scipy==1.12.0
+semantic-version==2.10.0
+sentence-transformers==2.3.1
+sentencepiece==0.1.99
+sgmllib3k==1.0.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.27
+starlette==0.36.3
+sympy==1.12
+tabulate==0.9.0
+tenacity==8.2.3
+threadpoolctl==3.3.0
+tokenizers==0.15.2
+tomli==2.0.1
+toolz==0.12.1
+torch==2.2.0
+tqdm==4.66.2
+transformers==4.37.2
+triton==2.2.0
+typer==0.9.0
+types-requests==2.31.0.20240125
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2024.1
+unstructured==0.12.4
+unstructured-client==0.18.0
+urllib3==2.0.7
+uvicorn==0.27.1
+uvloop==0.19.0
+validators==0.22.0
+watchfiles==0.21.0
+websocket-client==1.7.0
+websockets==11.0.3
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.17.0