AdityaKhalkar commited on
Commit
fc4e82c
1 Parent(s): d1d080a

added files

Browse files
Files changed (4) hide show
  1. README.md +5 -12
  2. datasets.csv +0 -0
  3. requirements.txt +5 -0
  4. streamlit_app.py +61 -0
README.md CHANGED
@@ -1,13 +1,6 @@
1
- ---
2
- title: Dataset Finder
3
- emoji: 💻
4
- colorFrom: pink
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.33.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
2
 
3
+ Edit `/streamlit_app.py` to customize this app to your heart's desire. :heart:
4
+
5
+ If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
6
+ forums](https://discuss.streamlit.io).
datasets.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ altair
2
+ pandas
3
+ streamlit
4
+ tensorflow
5
+ transformers
streamlit_app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from transformers import pipeline
4
+
5
+ # Load the zero-shot classification model
6
+ classifier = pipeline("zero-shot-classification",
7
+ model="facebook/bart-large-mnli")
8
+
9
+ # Sample dataset (replace this with your actual dataset)
10
+ df = pd.read_csv('/content/Dataset-finder/datasets.csv')
11
+
12
+ def tag_finder(user_input):
13
+ keywords = df['Keyword'].unique()
14
+ result = classifier(user_input, keywords)
15
+ threshold = result['scores'][0]
16
+ for score in result['scores']:
17
+ if score == threshold:
18
+ continue
19
+ if (threshold - score) >= threshold / 10:
20
+ threshold = score
21
+ else:
22
+ break
23
+ useful_tags = [result['labels'][idx] for idx, score in enumerate(result['scores']) if score >= threshold]
24
+ relevant_datasets = []
25
+ for tag in useful_tags:
26
+ relevant_datasets.extend(df[df['Keyword'] == tag]['Datasets'].tolist())
27
+ return useful_tags, relevant_datasets
28
+
29
+ # Define the Streamlit app
30
+ def main():
31
+ # Set title and description
32
+ st.title("Dataset Tagging System")
33
+ st.write("Enter your text below and get relevant tags for your dataset.")
34
+ # Get user input
35
+ user_input = st.text_input("Enter your text:")
36
+
37
+ if st.button("Submit"):
38
+ # Find relevant tags and datasets
39
+ relevant_tags, relevant_datasets = tag_finder(user_input)
40
+
41
+ # Display relevant tags
42
+ if relevant_tags:
43
+ st.subheader("Datasets:")
44
+ for dataset in relevant_datasets:
45
+ tag = df[df['Datasets'] == dataset]['Keyword'].iloc[0]
46
+ st.markdown(f'''
47
+ <div style="border: 2px solid #555; border-radius: 10px; padding: 10px; margin-bottom: 10px; background-color: #333; color: white; display: flex; justify-content: space-between; align-items: center;">
48
+ <div>{dataset}</div>
49
+ <div style="padding: 5px 10px; border: #fff 2px solid; border-radius: 5px;transition: background-color 0.3s;"><a href="https://datasetsearch.research.google.com/search?search&src=0&query={dataset}" style = "text-decoration: none; color: white;">link</a></div>
50
+ <div style="border: 1px solid #666; padding: 5px; background-color: #444; border-radius: 12px;">
51
+ <img width="20" height="20" style="margin: 5px;" src="https://img.icons8.com/ios/50/ffffff/price-tag--v2.png" alt="price-tag--v2"/>{tag}
52
+ </div>
53
+ </div>
54
+
55
+ ''', unsafe_allow_html=True)
56
+ else:
57
+ st.warning("No relevant tags found.")
58
+
59
+ # Run the app
60
+ if __name__ == "__main__":
61
+ main()