Fangrui Liu commited on
Commit
a796108
β€’
1 Parent(s): 980721a
Files changed (6) hide show
  1. .gitignore +167 -0
  2. README.md +108 -13
  3. app.py +163 -0
  4. callbacks/arxiv_callbacks.py +50 -0
  5. prompts/arxiv_prompt.py +12 -0
  6. requirements.txt +10 -0
.gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+
163
+ # dataset files
164
+ data/
165
+ .streamlit/
166
+ *.ipynb
167
+ .DS_Store
README.md CHANGED
@@ -1,13 +1,108 @@
1
- ---
2
- title: ChatData
3
- emoji: πŸ“ˆ
4
- colorFrom: pink
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.21.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ChatData πŸ” πŸ“–
2
+ ***We are constantly improving LangChain's self-query retriever. Some of the features are not merged.***
3
+
4
+ [![](https://dcbadge.vercel.app/api/server/D2qpkqc4Jq?compact=true&style=flat)](https://discord.gg/D2qpkqc4Jq)
5
+ [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/myscaledb.svg?style=social&label=Follow%20%40MyScaleDB)](https://twitter.com/myscaledb)
6
+
7
+ ![ChatData](assets/logo.png)
8
+
9
+ Yet another chat-with-documents app, but supporting query over millions of files with [MyScale](https://myscale.com) and [LangChain](https://github.com/hwchase17/langchain/).
10
+
11
+ ## News πŸ”₯
12
+
13
+ - πŸ”§ Our contribution to LangChain that helps self-query retrievers [**filter with more types and functions**](https://python.langchain.com/docs/modules/data_connection/retrievers/how_to/self_query/myscale_self_query)
14
+ - 🌟 **We just opened a FREE pod hosting data for ArXiv paper.** Anyone can try their own SQL with vector search!!! Feel the power when SQL meets vector search! See how to access the pod [here](#data-service).
15
+ - πŸ“š We collected **1.67 million papers on arxiv**! We are collecting more and we need your advice!
16
+ - More coming...
17
+
18
+ ## Quickstart
19
+
20
+ 1. Create an virtual environment
21
+
22
+ ```bash
23
+ python3 -m venv .venv
24
+ source .venv/bin/activate
25
+ ```
26
+
27
+ 2. Install dependencies
28
+
29
+ > This app is currently using [MyScale's fork of LangChain](https://github.com/myscale/langchain/tree/master). It contains [improved prompts](https://github.com/hwchase17/langchain/pull/6737#discussion_r1243527112) for comparators `LIKE` and `CONTAIN` in [MyScale self-query retriever](https://github.com/hwchase17/langchain/pull/6143).
30
+
31
+ ```bash
32
+ python3 -m pip install -r requirements.txt
33
+ ```
34
+
35
+ 3. Run the app!
36
+
37
+ ```python
38
+ # fill you OpenAI key in .streamlit/secrets.toml
39
+ cp .streamlit/secrets.example.toml .streamlit/secrets.toml
40
+ # start the app
41
+ python3 -m streamlit run app.py
42
+ ```
43
+
44
+ ## Quick Navigator 🧭
45
+
46
+ - [How can I run this app?](README.md#how-to-run)
47
+
48
+ - [How this app is built?](docs/self-query.md)
49
+
50
+ - [What is the overview pipeline?](docs/self-query.md#query-pipeline-design)
51
+
52
+ - [How did LangChain and MyScale convert natural language to structured filters?](docs/self-query.md#selfqueryretriever-defines-interaction-between-vectorstore-and-your-app)
53
+
54
+ - [How to make chain execution more responsive in LangChain?](docs/self-query.md#not-responsive-add-callbacks)
55
+
56
+ - Where can I get those arxiv data?
57
+ - [From parquet files on S3](docs/self-query.md#insert-data)
58
+ - <a name="data-service"></a>Or directly use MyScale database as service... for **FREE** ✨
59
+ ```python
60
+ import clickhouse_connect
61
+
62
+ client = clickhouse_connect.get_client(
63
+ host='msc-1decbcc9.us-east-1.aws.staging.myscale.cloud',
64
+ port=443,
65
+ username='chatdata',
66
+ password='myscale_rocks'
67
+ )
68
+ ```
69
+ Or put these settings in `.streamlit/secrets.toml`
70
+
71
+ ```toml
72
+ MYSCALE_HOST = "msc-1decbcc9.us-east-1.aws.staging.myscale.cloud"
73
+ MYSCALE_PORT = 443
74
+ MYSCALE_USER = "chatdata"
75
+ MYSCALE_PASSWORD = "myscale_rocks"
76
+ ```
77
+
78
+ ## Introduction
79
+
80
+ ChatData brings millions of papers into your knowledge base. We imported 1.67 million papers with metadata info (continuously updating), which contains:
81
+
82
+ 1. `metadata.authors`: paper's authors in *list of strings*
83
+ 2. `metadata.abstract`: paper's abstracts used as ranking criterion (with InstructXL)
84
+ 3. `metadata.titles`: papers's titles
85
+ 4. `metadata.categories`: paper's categories in *list of strings* like ["cs.CV"]
86
+ 5. `metadata.pubdate`: paper's date of publication in *ISO 8601 formated strings*
87
+ 6. `metadata.primary_category`: paper's primary category in *strings* defined by ArXiv
88
+ 7. `metadata.comment`: some additional comment to the paper
89
+
90
+ And for overall table schema, please refer to [table creation section in docs/self-query.md](docs/self-query.md#table-creation).
91
+
92
+ ## How to run πŸƒ
93
+
94
+ ```bash
95
+ python3 -m pip install requirements.txt
96
+ python3 -m streamlit run app.py
97
+ ```
98
+
99
+ ## How to build? 🧱
100
+
101
+ See [docs/self-query.md](docs/self-query.md)
102
+
103
+ ## Special Thanks πŸ‘ (Ordered Alphabetically)
104
+
105
+ - [ArXiv API](https://info.arxiv.org/help/api/index.html) for its open access interoperability to pre-printed papers.
106
+ - [InstructorXL](https://huggingface.co/hkunlp/instructor-xl) for its promptable embeddings that improves retrieve performance.
107
+ - [LangChainπŸ¦œοΈπŸ”—](https://github.com/hwchase17/langchain/) for its easy-to-use and composable API designs and prompts.
108
+ - [The Alexandria Index](https://alex.macrocosm.so/download) for providing arXiv data index to the public.
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ from os import environ
4
+ import streamlit as st
5
+
6
+ from langchain.vectorstores import MyScale, MyScaleSettings
7
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
8
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
9
+ from langchain.chains.query_constructor.base import AttributeInfo
10
+ from langchain.chains import RetrievalQAWithSourcesChain
11
+ from langchain import OpenAI
12
+ from langchain.chat_models import ChatOpenAI
13
+
14
+ from prompts.arxiv_prompt import combine_prompt_template
15
+ from callbacks.arxiv_callbacks import ChatDataSearchCallBackHandler, ChatDataAskCallBackHandler
16
+ from langchain.prompts.prompt import PromptTemplate
17
+
18
+
19
+ environ['TOKENIZERS_PARALLELISM'] = 'true'
20
+
21
+ st.set_page_config(page_title="ChatData")
22
+
23
+ st.header("ChatData")
24
+
25
+ columns = ['title', 'id', 'categories', 'abstract', 'authors', 'pubdate']
26
+
27
+
28
+ def display(dataframe, columns):
29
+ if len(docs) > 0:
30
+ st.dataframe(dataframe[columns])
31
+ else:
32
+ st.write("Sorry 😡 we didn't find any articles related to your query.\nPlease use verbs that may match the datatype.", unsafe_allow_html=True)
33
+
34
+
35
+ @st.experimental_singleton(show_spinner=False)
36
+ def build_retriever():
37
+ with st.spinner("Loading Model..."):
38
+ embeddings = HuggingFaceInstructEmbeddings(
39
+ model_name='hkunlp/instructor-xl',
40
+ embed_instruction="Represent the question for retrieving supporting scientific papers: ")
41
+
42
+ with st.spinner("Connecting DB..."):
43
+ myscale_connection = {
44
+ "host": st.secrets['MYSCALE_HOST'],
45
+ "port": st.secrets['MYSCALE_PORT'],
46
+ "username": st.secrets['MYSCALE_USER'],
47
+ "password": st.secrets['MYSCALE_PASSWORD'],
48
+ }
49
+
50
+ config = MyScaleSettings(**myscale_connection, table='ChatArXiv',
51
+ column_map={
52
+ "id": "id",
53
+ "text": "abstract",
54
+ "vector": "vector",
55
+ "metadata": "metadata"
56
+ })
57
+ doc_search = MyScale(embeddings, config)
58
+
59
+ with st.spinner("Building Self Query Retriever..."):
60
+ metadata_field_info = [
61
+ AttributeInfo(
62
+ name="pubdate",
63
+ description="The year the paper is published",
64
+ type="timestamp",
65
+ ),
66
+ AttributeInfo(
67
+ name="authors",
68
+ description="List of author names",
69
+ type="list[string]",
70
+ ),
71
+ AttributeInfo(
72
+ name="title",
73
+ description="Title of the paper",
74
+ type="string",
75
+ ),
76
+ AttributeInfo(
77
+ name="categories",
78
+ description="arxiv categories to this paper",
79
+ type="list[string]"
80
+ ),
81
+ AttributeInfo(
82
+ name="length(categories)",
83
+ description="length of arxiv categories to this paper",
84
+ type="int"
85
+ ),
86
+ ]
87
+ retriever = SelfQueryRetriever.from_llm(
88
+ OpenAI(openai_api_key=st.secrets['OPENAI_API_KEY'], temperature=0),
89
+ doc_search, "Scientific papers indexes with abstracts. All in English.", metadata_field_info,
90
+ use_original_query=False)
91
+
92
+ with st.spinner('Building RetrievalQAWith SourcesChain...'):
93
+ document_with_metadata_prompt = PromptTemplate(
94
+ input_variables=["page_content", "id", "title", "authors"],
95
+ template="Content:\n\tTitle: {title}\n\tAbstract: {page_content}\n\tAuthors: {authors}\nSOURCE: {id}")
96
+ COMBINE_PROMPT = PromptTemplate(
97
+ template=combine_prompt_template, input_variables=["summaries", "question"])
98
+ chain = RetrievalQAWithSourcesChain.from_llm(
99
+ llm=ChatOpenAI(
100
+ openai_api_key=st.secrets['OPENAI_API_KEY'], temperature=0.6),
101
+ document_prompt=document_with_metadata_prompt,
102
+ combine_prompt=COMBINE_PROMPT,
103
+ retriever=retriever,
104
+ return_source_documents=True,)
105
+ return [{'name': m.name, 'desc': m.description, 'type': m.type} for m in metadata_field_info], retriever, chain
106
+
107
+
108
+ if 'retriever' not in st.session_state:
109
+ st.session_state['metadata_columns'], \
110
+ st.session_state['retriever'], \
111
+ st.session_state['chain'] = \
112
+ build_retriever()
113
+
114
+ st.info("We provides you metadata columns below for query. Please choose a natural expression to describe filters on those columns.\n\n" +
115
+ "For example: \n\n- What is a Bayesian network? Please use articles published later than Feb 2018 and with more than 2 categories and whose title like `computer` and must have `cs.CV` in its category.\n" +
116
+ "- What is neural network? Please use articles published by Geoffrey Hinton after 2018.\n" +
117
+ "- Introduce some applications of GANs published around 2019.")
118
+ st.info("You can retrieve papers with button `Query` or ask questions based on retrieved papers with button `Ask`.", icon='πŸ’‘')
119
+ st.dataframe(st.session_state.metadata_columns)
120
+ st.text_input("Ask a question:", key='query')
121
+ cols = st.columns([1, 1, 7])
122
+ cols[0].button("Query", key='search')
123
+ cols[1].button("Ask", key='ask')
124
+ plc_hldr = st.empty()
125
+
126
+ if st.session_state.search:
127
+ plc_hldr = st.empty()
128
+ with plc_hldr.expander('Query Log', expanded=True):
129
+ call_back = None
130
+ callback = ChatDataSearchCallBackHandler()
131
+ try:
132
+ docs = st.session_state.retriever.get_relevant_documents(
133
+ st.session_state.query, callbacks=[callback])
134
+ callback.progress_bar.progress(value=1.0, text="Done!")
135
+ docs = pd.DataFrame(
136
+ [{**d.metadata, 'abstract': d.page_content} for d in docs])
137
+
138
+ display(docs, columns)
139
+ except Exception as e:
140
+ st.write('Oops 😡 Something bad happened...')
141
+ # raise e
142
+
143
+ if st.session_state.ask:
144
+ plc_hldr = st.empty()
145
+ ctx = st.container()
146
+ with plc_hldr.expander('Chat Log', expanded=True):
147
+ call_back = None
148
+ callback = ChatDataAskCallBackHandler()
149
+ try:
150
+ ret = st.session_state.chain(
151
+ st.session_state.query, callbacks=[callback])
152
+ callback.progress_bar.progress(value=1.0, text="Done!")
153
+ st.markdown(
154
+ f"### Answer from LLM\n{ret['answer']}\n### References")
155
+ docs = ret['source_documents']
156
+ ref = re.findall(
157
+ '(http://arxiv.org/abs/\d{4}.\d+v\d)', ret['sources'])
158
+ docs = pd.DataFrame([{**d.metadata, 'abstract': d.page_content}
159
+ for d in docs if d.metadata['id'] in ref])
160
+ display(docs, columns)
161
+ except Exception as e:
162
+ st.write('Oops 😡 Something bad happened...')
163
+ # raise e
callbacks/arxiv_callbacks.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.callbacks.streamlit.streamlit_callback_handler import StreamlitCallbackHandler
3
+
4
+ class ChatDataSearchCallBackHandler(StreamlitCallbackHandler):
5
+ def __init__(self) -> None:
6
+ self.progress_bar = st.progress(value=0.0, text="Working...")
7
+ self.tokens_stream = ""
8
+
9
+ def on_llm_start(self, serialized, prompts, **kwargs) -> None:
10
+ pass
11
+
12
+ def on_text(self, text: str, **kwargs) -> None:
13
+ self.progress_bar.progress(value=0.2, text="Asking LLM...")
14
+
15
+ def on_chain_end(self, outputs, **kwargs) -> None:
16
+ self.progress_bar.progress(value=0.6, text='Searching in DB...')
17
+ st.markdown('### Generated Filter')
18
+ st.write(outputs['text'], unsafe_allow_html=True)
19
+
20
+ def on_chain_start(self, serialized, inputs, **kwargs) -> None:
21
+ pass
22
+
23
+ class ChatDataAskCallBackHandler(StreamlitCallbackHandler):
24
+ def __init__(self) -> None:
25
+ self.progress_bar = st.progress(value=0.0, text='Searching DB...')
26
+ self.status_bar = st.empty()
27
+ self.prog_value = 0.0
28
+ self.prog_map = {
29
+ 'langchain.chains.qa_with_sources.retrieval.RetrievalQAWithSourcesChain': 0.2,
30
+ 'langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain': 0.4,
31
+ 'langchain.chains.combine_documents.stuff.StuffDocumentsChain': 0.8
32
+ }
33
+
34
+ def on_llm_start(self, serialized, prompts, **kwargs) -> None:
35
+ pass
36
+
37
+ def on_text(self, text: str, **kwargs) -> None:
38
+ pass
39
+
40
+ def on_chain_start(self, serialized, inputs, **kwargs) -> None:
41
+ cid = '.'.join(serialized['id'])
42
+ if cid != 'langchain.chains.llm.LLMChain':
43
+ self.progress_bar.progress(value=self.prog_map[cid], text=f'Running Chain `{cid}`...')
44
+ self.prog_value = self.prog_map[cid]
45
+ else:
46
+ self.prog_value += 0.1
47
+ self.progress_bar.progress(value=self.prog_value, text=f'Running Chain `{cid}`...')
48
+
49
+ def on_chain_end(self, outputs, **kwargs) -> None:
50
+ pass
prompts/arxiv_prompt.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.qa_with_sources.map_reduce_prompt import combine_prompt_template
2
+ combine_prompt_template_ = (
3
+ "You are a helpful paper assistant. Your task is to provide information and answer any questions "
4
+ + "related to PDFs given below. You should only use the abstract of the selected papers as your source of information "
5
+ + "and try to provide concise and accurate answers to any questions asked by the user. If you are unable to find "
6
+ + "relevant information in the given sections, you will need to let the user know that the source does not contain "
7
+ + "relevant information but still try to provide an answer based on your general knowledge. The following is the related information "
8
+ + "about the paper that will help you answer users' questions, you MUST answer it using question's language:\n\n"
9
+ )
10
+
11
+ combine_prompt_template = combine_prompt_template_ + combine_prompt_template
12
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain @ git+https://github.com/myscale/langchain.git@master
2
+ InstructorEmbedding
3
+ pandas
4
+ sentence_transformers
5
+ streamlit==1.20
6
+ altair==4.2.2
7
+ clickhouse-connect
8
+ openai
9
+ lark
10
+ tiktoken