File size: 5,806 Bytes
3d2ccf5
 
 
 
cf322df
97f5741
b3d2c60
818627e
 
3d2ccf5
5fdd7f8
3d2ccf5
1b8a80e
75e25ba
19d6ff0
 
 
 
 
 
 
e752a6d
 
19d6ff0
 
 
 
 
 
e752a6d
19d6ff0
 
3d2ccf5
b3d2c60
1093800
 
b3d2c60
1093800
 
b3d2c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5100216
b3d2c60
 
 
 
 
 
 
 
5100216
b3d2c60
 
 
 
 
 
 
 
0b96e65
296d90c
518d051
5322743
296d90c
c52039d
51739ac
850e742
 
 
e413986
 
b3d2c60
e413986
51739ac
b3d2c60
19d6ff0
c52039d
e413986
19d6ff0
b3d2c60
7ec07f8
e413986
 
7ec07f8
e413986
 
 
 
 
306b3a0
7ec07f8
e413986
 
92a0f76
 
96af6c4
e413986
 
e752a6d
e413986
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st
import requests
import os
from streamlit_chat import message
import random
from sentence_transformers import SentenceTransformer, util
import nltk
import numpy as np
import pandas as pd

nltk.download("punkt")


context = "To extract information from documents, use sentence similarity task. To classify sentiments, use text classification task. To do sentiment analysis, use text classification task. To detect masks from images, use object detection task. To extract name or address from documents use token classification task. To extract name or address from invoices, use token classification task. To build voice enabled applications, you can use automatic speech recognition task. You can retrieve information from documents using sentence similarity task. You can summarize papers using summarization task. You can convert text to speech using text-to-speech task. To detect language spoken in an audio, you can use audio classification task. To detect emotion in an audio, you can use audio classification task. To detect commands in an audio, you can use audio classification task. To decompose sounds in a recording, use audio-to-audio task. To answer questions from a document, you can use question answering task. To answer FAQs from your customers, you can use question answering task. To see if a text is grammatically correct, you can use text classification task. To augment your training data, you can use text classification task. To detect pedestrians, you can use object detection task."


link_dict = {
    "audio-to-audio": "https://huggingface.co/tasks/audio-to-audio",
    "audio classification": "https://huggingface.co/tasks/audio-classification",
    "automatic speech recognition": "https://huggingface.co/tasks/automatic-speech-recognition",
    "fill-mask":"https://huggingface.co/tasks/fill-mask",
    "image classification": "https://huggingface.co/tasks/image-classification",
    "image segmentation": "https://huggingface.co/tasks/image-segmentation",
    "question answering":"https://huggingface.co/tasks/question-answering",
    "text-to-speech":"https://huggingface.co/tasks/text-to-speech",
    "sentence similarity": "https://huggingface.co/tasks/sentence-similarity",
    "summarization":"https://huggingface.co/tasks/summarization",
    "text generation": "https://huggingface.co/tasks/text-generation",
    "translation": "https://huggingface.co/tasks/translation",
    "token classification": "https://huggingface.co/tasks/token-classification",
    "object detection": "https://huggingface.co/tasks/object-detection"}
    


model_name = 'sentence-transformers/msmarco-distilbert-base-v4'
max_sequence_length = 512

model = SentenceTransformer(model_name)
model.max_seq_length = max_sequence_length
corpus = []
sentence_count = []

for sent in context.split("."):

    sentences = nltk.tokenize.sent_tokenize(str(sent), language='english')
    sentence_count.append(len(sentences))
    for _,s in enumerate(sentences):
        corpus.append(s)
        
        
        
corpus_embeddings = np.load('task_embeddings_msmarco-distilbert-base-v4.npy')
corpus_embeddings.shape




def find_sentences(query):
    query_embedding = model.encode(query)
    hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
    hits = hits[0]

    for hit in hits:
        corpus_id = hit['corpus_id']

        # Find source document based on sentence index
        count = 0
        for idx, c in enumerate(sentence_count):
            count+=c
            if (corpus_id > count-1):
                continue
            else:
                doc = corpus[idx]

                break







message_history = [{"text":"Let's find out the best task for your use case! Tell me about your use case :)", "is_user":False}]

st.subheader("If you don't know how to build your machine learning product for your use case, Taskmaster is here to help you! πŸͺ„βœ¨")
st.write("Hint: Try to ask your use case in a question form.")
for msg in message_history:
    message(msg["text"], is_user = msg["is_user"])   # display all the previous message

placeholder = st.empty()  # placeholder for latest message


input = st.text_input("Ask me πŸ€—")
if input:

    message_history.append({"text":input, "is_user" : True})

    model_answer = find_sentences("How can I extract information from invoices?")


    if resp.status_code == 200:
    

        key_exists = False
        for key in link_dict:
            if key in model_answer:
                key_exists = True
                url = link_dict[key]
                response_templates = [f"I think that {model_answer} is the best task for this 🀩 Check out the page πŸ‘‰πŸΌ {url}", f"I think you should use {model_answer} πŸͺ„ Check it out here πŸ‘‰πŸΌ {url}", f"I think {model_answer} should work for you πŸ€“ Check out the page πŸ‘‰πŸΌ {url}"]
        
                bot_answer = random.choice(response_templates)
                message_history.append({"text":bot_answer, "is_user" : False})
        if key_exists == False:
            fallback_template = ["I didn't get the question 🧐 Could you please ask again? Try 'What should I use for detecting masks in an image?'",
                                     "Hmm, not sure I know the answer, maybe you could ask differently? πŸ€“",
                                     "Sorry, I didn't understand you, maybe you could ask differently? πŸ€“ Try asking 'What should I use to extract name in a document' πŸ€—"]
            bot_answer = random.choice(fallback_template)
            message_history.append({"text":bot_answer, "is_user" : False})
    
    with placeholder.container():
        last_message = message_history[-1]
        if last_message:
            message(last_message["text"], last_message["is_user"])