Spaces:
Sleeping
Sleeping
datasets: | |
- namespace: local | |
name: OpenHermes-2.5-10k | |
source: | |
dataset_name: teknium/OpenHermes-2.5 | |
sample_size: 9999 | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversations | |
- '*' | |
- value | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - test__clusters | |
- text | |
- - conversations | |
- '*' | |
- value | |
- - test__cluster | |
- text | |
markdown_paths: [] | |
- namespace: local | |
name: OpenOrca-100k | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
sample_size: 100000 | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: openai | |
signals: | |
- path: question | |
signal: | |
embedding: openai | |
namespace: local | |
concept_name: physics | |
version: 21 | |
signal_name: concept_score | |
- path: question | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
markdown_paths: [] | |
- namespace: local | |
name: glue_ax | |
source: | |
dataset_name: glue | |
config_name: ax | |
source_name: huggingface | |
embeddings: | |
- path: premise | |
embedding: gte-small | |
- path: hypothesis | |
embedding: gte-small | |
signals: | |
- path: premise | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- premise | |
markdown_paths: [] | |
- namespace: local | |
name: ableton | |
source: | |
source_name: llama_index_docs | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
markdown_paths: [] | |
- namespace: local | |
name: Capybara | |
source: | |
dataset_name: LDJnr/Capybara | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversation | |
- '*' | |
- input | |
embedding: gte-small | |
signals: | |
- path: | |
- conversation | |
- '*' | |
- input | |
signal: | |
signal_name: text_statistics | |
- path: | |
- conversation | |
- '*' | |
- input | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
settings: | |
ui: | |
media_paths: | |
- input | |
- - conversation | |
- '*' | |
- input | |
- - conversation | |
- '*' | |
- output | |
markdown_paths: [] | |
- namespace: local | |
name: OpenOrca-10k | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
sample_size: 10000 | |
source_name: huggingface | |
embeddings: | |
- path: response | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
markdown_paths: [] | |
- namespace: local | |
name: cpb | |
source: | |
dataset_name: LDJnr/Capybara | |
source_name: huggingface | |
signals: | |
- path: | |
- conversation | |
- '*' | |
- input | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- input | |
- - conversation | |
- '*' | |
- input | |
- - conversation | |
- '*' | |
- output | |
markdown_paths: [] | |
- namespace: local | |
name: mikeion_dissertation_data_with_split | |
source: | |
dataset_name: mikeion/dissertation_data_with_split | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- content | |
markdown_paths: [] | |
- namespace: local | |
name: mikeion_dissertation_data | |
source: | |
dataset_name: mikeion/dissertation_data | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- - messages | |
- '*' | |
- attachments | |
- '*' | |
- url | |
markdown_paths: [] | |
- namespace: local | |
name: test | |
source: | |
filepaths: | |
- ~/Code/lilac_datasets/test.json | |
source_name: json | |
settings: | |
ui: | |
media_paths: | |
- json | |
markdown_paths: [] | |
- namespace: local | |
name: OrcaMyles | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
markdown_paths: [] | |
- namespace: local | |
name: OpenOrca | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: response | |
embedding: gte-small | |
signals: | |
- path: question | |
signal: | |
signal_name: pii | |
- path: question | |
signal: | |
signal_name: text_statistics | |
- path: response | |
signal: | |
signal_name: pii | |
- path: response | |
signal: | |
signal_name: markdown_code_block | |
- path: response | |
signal: | |
signal_name: text_statistics | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: lilac | |
concept_name: non-english | |
signal_name: concept_score | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
markdown_paths: [] | |
- namespace: local | |
name: imdb | |
source: | |
dataset_name: imdb | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
signals: | |
- path: text | |
signal: | |
signal_name: pii | |
- path: label | |
signal: | |
signal_name: text_statistics | |
settings: | |
ui: | |
media_paths: | |
- text | |
markdown_paths: [] | |
- namespace: local | |
name: capybara | |
source: | |
dataset_name: capybara | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- - conversation | |
- '*' | |
- input | |
- - conversation | |
- '*' | |
- output | |
markdown_paths: [] | |
- namespace: local | |
name: db-openorca-10k | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
sample_size: 10000 | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
signals: | |
- path: question | |
signal: | |
embedding: gte-small | |
namespace: local | |
concept_name: physics | |
signal_name: concept_score | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
- - question__cluster | |
- text | |
markdown_paths: [] | |