Spaces:

RTTP
/

EDS-Chatbot

Runtime error

App Files Files Community

hayuh commited on Jun 19

Commit

a1162e9

•

1 Parent(s): 180cacd

Upload 15 files

Browse files

Files changed (16) hide show

.gitattributes +6 -0
Ehlers-Danlos-1/2024_EDS_1.pdf +0 -0
Ehlers-Danlos-1/2024_EDS_2.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_3.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_4.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_5.pdf +3 -0
Ehlers-Danlos-1/2024_EDS_6.pdf +0 -0
Ehlers-Danlos-1/2024_EDS_7.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_1.pdf +3 -0
Ehlers-Danlos-1/Unknown_EDS_2.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_3.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_4.pdf +0 -0
Ehlers-Danlos-1/Unknown_EDS_5.pdf +3 -0
helper.py +13 -0
requirements.txt +14 -1
utils.py +82 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_2.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_3.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_4.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/2024_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/Unknown_EDS_1.pdf filter=lfs diff=lfs merge=lfs -text
+Ehlers-Danlos-1/Unknown_EDS_5.pdf filter=lfs diff=lfs merge=lfs -text

Ehlers-Danlos-1/2024_EDS_1.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

Ehlers-Danlos-1/2024_EDS_2.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46fc736ff4174473e0a846b7ca8430c140d89cd2c9f663e105bc48b33f8d9c99
+size 2616000

Ehlers-Danlos-1/2024_EDS_3.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fef5c8c375297158ad7ad63166405ca7ce4ac511371a8454fe9df972755b0fe
+size 10344738

Ehlers-Danlos-1/2024_EDS_4.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25db35c77fd6aeba6b15278671a462b30ffbb6f97eb5f221e0459f6d11c0f8ed
+size 1071576

Ehlers-Danlos-1/2024_EDS_5.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57ef98bcb445da6abda66de35204634bd81d8c6dcdf53bfc3be54447ec9ad0ad
+size 2772421

Ehlers-Danlos-1/2024_EDS_6.pdf ADDED Viewed

Binary file (146 kB). View file

Ehlers-Danlos-1/2024_EDS_7.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

Ehlers-Danlos-1/Unknown_EDS_1.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbeaf13d3298a00bc1c7acfba3177a0c639f677e0f0941452709fe60542052d4
+size 21553835

Ehlers-Danlos-1/Unknown_EDS_2.pdf ADDED Viewed

Binary file (428 kB). View file

Ehlers-Danlos-1/Unknown_EDS_3.pdf ADDED Viewed

Binary file (817 kB). View file

Ehlers-Danlos-1/Unknown_EDS_4.pdf ADDED Viewed

Binary file (392 kB). View file

Ehlers-Danlos-1/Unknown_EDS_5.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c5a77524b6bb4dca40798af5ff3e3c622216a13ac21a60d9befce255977b47a
+size 1847313

helper.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Add your utilities or helper functions to this file.
+import os
+from dotenv import load_dotenv, find_dotenv
+# these expect to find a .env file at the directory above the lesson.                                                                                                                     # the format for that file is (without the comment)                                                                                                                                       #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
+def load_env():
+    _ = load_dotenv(find_dotenv())
+def get_openai_api_key():
+    load_env()
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    return openai_api_key

requirements.txt CHANGED Viewed

	@@ -1 +1,14 @@
1	- ~~huggingface_hub==0.22.2~~

+# requirements file
+# note which revision of python, for example 3.9.6
+# in this file, insert all the pip install needs, include revision
+python-dotenv==1.0.0
+llama-index==0.10.27
+llama-index-llms-openai==0.1.15
+llama-index-embeddings-openai==0.1.7
+gradio
+transformers
+torch>=1.8.0

utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from llama_index.core import SimpleDirectoryReader
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core import Settings
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core import SummaryIndex, VectorStoreIndex
+from llama_index.core.tools import QueryEngineTool
+from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
+from llama_index.core.selectors import LLMSingleSelector
+from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.tools import FunctionTool, QueryEngineTool
+from llama_index.core.vector_stores import MetadataFilters, FilterCondition
+from typing import List, Optional
+def get_doc_tools(
+    file_path: str,
+    name: str,
+) -> str:
+    """Get vector query and summary query tools from a document."""
+    # load documents
+    documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
+    splitter = SentenceSplitter(chunk_size=1024)
+    nodes = splitter.get_nodes_from_documents(documents)
+    vector_index = VectorStoreIndex(nodes)
+    def vector_query(
+        query: str,
+        page_numbers: Optional[List[str]] = None
+    ) -> str:
+        """Use to answer questions over a given paper.
+        Useful if you have specific questions over the paper.
+        Always leave page_numbers as None UNLESS there is a specific page you want to search for.
+        Args:
+            query (str): the string query to be embedded.
+            page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE
+                if we want to perform a vector search
+                over all pages. Otherwise, filter by the set of specified pages.
+        """
+        page_numbers = page_numbers or []
+        metadata_dicts = [
+            {"key": "page_label", "value": p} for p in page_numbers
+        ]
+        query_engine = vector_index.as_query_engine(
+            similarity_top_k=2,
+            filters=MetadataFilters.from_dicts(
+                metadata_dicts,
+                condition=FilterCondition.OR
+            )
+        )
+        response = query_engine.query(query)
+        return response
+    vector_query_tool = FunctionTool.from_defaults(
+        name=f"vector_tool_{name}",
+        fn=vector_query
+    )
+    summary_index = SummaryIndex(nodes)
+    summary_query_engine = summary_index.as_query_engine(
+        response_mode="tree_summarize",
+        use_async=True,
+    )
+    summary_tool = QueryEngineTool.from_defaults(
+        name=f"summary_tool_{name}",
+        query_engine=summary_query_engine,
+        description=(
+            f"Useful for summarization questions related to {name}"
+        ),
+    )
+    return vector_query_tool, summary_tool