yazan-istatiyeh commited on
Commit
9b7711f
1 Parent(s): d65fc88

Added files and database

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa filter=lfs diff=lfs merge=lfs -text
37
+ distilbert/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
demo.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
+
6
+ # VectorIndexRetriever
7
+ from llama_index.schema import NodeWithScore
8
+ from llama_index import GPTVectorStoreIndex, ServiceContext
9
+ from llama_index.storage import StorageContext
10
+ from llama_index.vector_stores import ChromaVectorStore
11
+ from llama_index.memory import ChatMemoryBuffer
12
+ from llama_index.embeddings import HuggingFaceEmbedding
13
+
14
+ import chromadb
15
+
16
+ chroma_client = chromadb.PersistentClient(path="distilbert/")
17
+
18
+ chroma_collection = chroma_client.get_or_create_collection("distilbert")
19
+
20
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
21
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
22
+ embed_model = HuggingFaceEmbedding(model_name="distilbert-base-uncased")
23
+ service_context = ServiceContext.from_defaults(embed_model=embed_model)
24
+ index = GPTVectorStoreIndex.from_vector_store(
25
+ vector_store=vector_store,
26
+ storage_context=storage_context,
27
+ service_context=service_context
28
+ )
29
+
30
+ memory = ChatMemoryBuffer.from_defaults(token_limit=1000)
31
+
32
+ chat_engine = index.as_chat_engine(
33
+ similarity_top_k=5,
34
+ chat_mode="context",
35
+ memory=memory,
36
+ system_prompt="You are an Artificial Intelligence (AI)-powered app called STAR " + \
37
+ "(Standards Technical Assistance Resource) that could " + \
38
+ "streamline the process and offer requirement " + \
39
+ "recommendations, you can be used as copilot, to help " + \
40
+ "mission designers blast off with even greater " + \
41
+ "confidence, knowing that they have the right " + \
42
+ "requirements in place. You should analyze and suggest " + \
43
+ "improvements to a NASA standards."
44
+ )
45
+ query_engine = index.as_query_engine()
46
+ retriever = index.as_retriever(
47
+ similarity_top_k=5,
48
+ )
49
+
50
+ THRESHOLD = 1e-9
51
+
52
+ def inference(text, reset):
53
+ if text:
54
+ response = chat_engine.chat(text)
55
+ top_k_similar_nodes = retriever.retrieve(text)
56
+ to_view = []
57
+ for node in top_k_similar_nodes:
58
+ # print(node.get_score(), end = ' ')
59
+ if node.get_score() > THRESHOLD:
60
+ to_view.append((node.metadata()['file_name'], node.get_score()))
61
+ # to_view.append((node.metadata()['file_name'], node.get_score()))
62
+ references = ""
63
+ if len(to_view) > 0:
64
+ for i, t in enumerate(to_view):
65
+ filename, score = t
66
+ splits = filename.split('_')
67
+ page_number = int(splits[1])
68
+ original_document = ''.join(splits[3:])
69
+ references += f'{i + 1}- Document: ' + \
70
+ f'{original_document[:-4]}, Page: ' + \
71
+ f'{page_number} (Score: {score}).'
72
+ if i != len(to_view) - 1:
73
+ references += '\n'
74
+ if reset:
75
+ memory.reset()
76
+ return response, references
77
+ elif reset:
78
+ memory.reset()
79
+ return "", ""
80
+
81
+ examples = [
82
+ [
83
+ "How should the length-to-depth ratio of the initial flaw be " + \
84
+ "assumed when using the NASGRO® computer program for glass " + \
85
+ "structure analysis? Please provide a detailed procedure.",
86
+ True
87
+ ]
88
+ ]
89
+
90
+ playground = gr.Interface(
91
+ fn=inference,
92
+ inputs=[
93
+ gr.Textbox(
94
+ value="Hello, who are you?",
95
+ label="Input",
96
+ info="Chat with STAR."
97
+ ),
98
+ gr.Checkbox(
99
+ label="Reset chat history",
100
+ info="Start a new conversation from scratch with STAR."
101
+ )
102
+ ],
103
+ outputs=[
104
+ gr.Textbox(
105
+ label="Response"
106
+ ),
107
+ gr.Textbox(
108
+ label="References"
109
+ )
110
+ ],
111
+ examples=examples,
112
+ cache_examples=True,
113
+ allow_flagging=False
114
+ )
115
+
116
+ playground.launch(share=True)
distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b875d0c9daae9e61adcd2c8db74630fd44f38b1409042455908fa07f718743
3
+ size 51392000
distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90716667250485db3ae5cbd6d6794aafe244b85861ca79a632d92813b6fb5407
3
+ size 100
distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811fdefc63959ca8c1c224917c80aad24cff9f5fa0d0c7f3c9b6a407d3885aa0
3
+ size 926190
distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b405bbc082fd52d8bb9351d7bec3d3f98acdfe03d96b891786e59aa0b8cfd273
3
+ size 64000
distilbert/88cf4ce2-b959-412a-9bef-3f16720e41fa/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7ab8525b2c907e225dedc7feb3d9e0dfdb2cedb417aa2f277ab219d6927e46
3
+ size 136624
distilbert/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7db200a30e658c17439694c9fcb339ac383b585b3eee4f939f510d2ac964247
3
+ size 323895296
requirements.txt ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ aiofiles==23.2.1
3
+ aiohttp==3.8.5
4
+ aiosignal==1.3.1
5
+ altair==5.1.2
6
+ annotated-types==0.6.0
7
+ anyio==3.7.1
8
+ astunparse==1.6.3
9
+ async-timeout==4.0.3
10
+ attrs==23.1.0
11
+ backoff==2.2.1
12
+ bcrypt==4.0.1
13
+ beautifulsoup4==4.12.2
14
+ bs4==0.0.1
15
+ cachetools==5.3.1
16
+ certifi==2023.7.22
17
+ charset-normalizer==3.3.0
18
+ chroma-hnswlib==0.7.3
19
+ chromadb==0.4.13
20
+ click==8.1.7
21
+ coloredlogs==15.0.1
22
+ contourpy==1.1.1
23
+ cycler==0.12.1
24
+ dataclasses-json==0.6.1
25
+ diskcache==5.6.3
26
+ exceptiongroup==1.1.3
27
+ fastapi==0.103.2
28
+ ffmpy==0.3.1
29
+ filelock==3.12.4
30
+ flatbuffers==23.5.26
31
+ fonttools==4.43.1
32
+ frozenlist==1.4.0
33
+ fsspec==2023.9.2
34
+ gast==0.5.4
35
+ google-auth==2.23.2
36
+ google-auth-oauthlib==1.0.0
37
+ google-pasta==0.2.0
38
+ gradio==3.47.1
39
+ gradio_client==0.6.0
40
+ greenlet==3.0.0
41
+ grpcio==1.59.0
42
+ h11==0.14.0
43
+ h5py==3.9.0
44
+ httpcore==0.18.0
45
+ httptools==0.6.0
46
+ httpx==0.25.0
47
+ huggingface-hub==0.17.3
48
+ humanfriendly==10.0
49
+ idna==3.4
50
+ importlib-resources==6.1.0
51
+ Jinja2==3.1.2
52
+ joblib==1.3.2
53
+ jsonpatch==1.33
54
+ jsonpointer==2.4
55
+ jsonschema==4.19.1
56
+ jsonschema-specifications==2023.7.1
57
+ keras==2.14.0
58
+ kiwisolver==1.4.5
59
+ langchain==0.0.310
60
+ langsmith==0.0.43
61
+ libclang==16.0.6
62
+ llama-index==0.8.40
63
+ llama_cpp_python==0.2.11
64
+ Markdown==3.5
65
+ MarkupSafe==2.1.3
66
+ marshmallow==3.20.1
67
+ matplotlib==3.8.0
68
+ ml-dtypes==0.2.0
69
+ monotonic==1.6
70
+ mpmath==1.3.0
71
+ multidict==6.0.4
72
+ mypy-extensions==1.0.0
73
+ nest-asyncio==1.5.8
74
+ networkx==3.1
75
+ nltk==3.8.1
76
+ numpy==1.26.0
77
+ nvidia-cublas-cu11==11.11.3.6
78
+ nvidia-cublas-cu12==12.1.3.1
79
+ nvidia-cuda-cupti-cu11==11.8.87
80
+ nvidia-cuda-cupti-cu12==12.1.105
81
+ nvidia-cuda-nvcc-cu11==11.8.89
82
+ nvidia-cuda-nvrtc-cu12==12.1.105
83
+ nvidia-cuda-runtime-cu11==11.8.89
84
+ nvidia-cuda-runtime-cu12==12.1.105
85
+ nvidia-cudnn-cu11==8.7.0.84
86
+ nvidia-cudnn-cu12==8.9.2.26
87
+ nvidia-cufft-cu11==10.9.0.58
88
+ nvidia-cufft-cu12==11.0.2.54
89
+ nvidia-curand-cu11==10.3.0.86
90
+ nvidia-curand-cu12==10.3.2.106
91
+ nvidia-cusolver-cu11==11.4.1.48
92
+ nvidia-cusolver-cu12==11.4.5.107
93
+ nvidia-cusparse-cu11==11.7.5.86
94
+ nvidia-cusparse-cu12==12.1.0.106
95
+ nvidia-nccl-cu11==2.16.5
96
+ nvidia-nccl-cu12==2.18.1
97
+ nvidia-nvjitlink-cu12==12.2.140
98
+ nvidia-nvtx-cu12==12.1.105
99
+ oauthlib==3.2.2
100
+ onnxruntime==1.16.0
101
+ openai==0.28.1
102
+ opt-einsum==3.3.0
103
+ orjson==3.9.7
104
+ overrides==7.4.0
105
+ packaging==23.2
106
+ pandas==2.1.1
107
+ Pillow==10.0.1
108
+ posthog==3.0.2
109
+ protobuf==4.24.4
110
+ pulsar-client==3.3.0
111
+ pyasn1==0.5.0
112
+ pyasn1-modules==0.3.0
113
+ pydantic==2.4.2
114
+ pydantic_core==2.10.1
115
+ pydub==0.25.1
116
+ pyparsing==3.1.1
117
+ pypdf==3.16.2
118
+ PyPDF2==3.0.1
119
+ PyPika==0.48.9
120
+ python-dateutil==2.8.2
121
+ python-dotenv==1.0.0
122
+ python-multipart==0.0.6
123
+ pytz==2023.3.post1
124
+ PyYAML==6.0.1
125
+ referencing==0.30.2
126
+ regex==2023.10.3
127
+ requests==2.31.0
128
+ requests-oauthlib==1.3.1
129
+ rpds-py==0.10.4
130
+ rsa==4.9
131
+ safetensors==0.4.0
132
+ scikit-learn==1.3.1
133
+ scipy==1.11.3
134
+ semantic-version==2.10.0
135
+ sentence-transformers==2.2.2
136
+ sentencepiece==0.1.99
137
+ six==1.16.0
138
+ sniffio==1.3.0
139
+ soupsieve==2.5
140
+ SQLAlchemy==2.0.21
141
+ starlette==0.27.0
142
+ sympy==1.12
143
+ tenacity==8.2.3
144
+ tensorboard==2.14.1
145
+ tensorboard-data-server==0.7.1
146
+ tensorflow==2.14.0
147
+ tensorflow-estimator==2.14.0
148
+ tensorflow-io-gcs-filesystem==0.34.0
149
+ tensorrt==8.5.3.1
150
+ termcolor==2.3.0
151
+ threadpoolctl==3.2.0
152
+ tiktoken==0.5.1
153
+ tokenizers==0.14.1
154
+ toolz==0.12.0
155
+ torch==2.1.0
156
+ torchvision==0.16.0
157
+ tqdm==4.66.1
158
+ transformers==4.34.0
159
+ triton==2.1.0
160
+ typer==0.9.0
161
+ typing-inspect==0.9.0
162
+ typing_extensions==4.8.0
163
+ tzdata==2023.3
164
+ urllib3==1.26.17
165
+ uvicorn==0.23.2
166
+ uvloop==0.17.0
167
+ watchfiles==0.20.0
168
+ websockets==11.0.3
169
+ Werkzeug==3.0.0
170
+ wrapt==1.14.1
171
+ yarl==1.9.2