merve HF staff commited on
Commit
b3d2c60
β€’
1 Parent(s): efa2efd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -17
app.py CHANGED
@@ -3,15 +3,9 @@ import requests
3
  import os
4
  from streamlit_chat import message
5
  import random
 
 
6
 
7
- @st.cache
8
- def query(payload):
9
- api_token = os.getenv("api_token")
10
- model_id = "deepset/roberta-base-squad2"
11
- headers = {"Authorization": f"Bearer {api_token}"}
12
- API_URL = f"https://api-inference.huggingface.co/models/{model_id}"
13
- response = requests.post(API_URL, headers=headers, json=payload)
14
- return response.json(), response
15
 
16
 
17
  context = "To extract information from documents, use sentence similarity task. To classify sentiments, use text classification task. To do sentiment analysis, use text classification task. To detect masks from images, use object detection task. To extract name or address from documents use token classification task. To extract name or address from invoices, use token classification task. To build voice enabled applications, you can use automatic speech recognition task. You can retrieve information from documents using sentence similarity task. You can summarize papers using summarization task. You can convert text to speech using text-to-speech task. To detect language spoken in an audio, you can use audio classification task. To detect emotion in an audio, you can use audio classification task. To detect commands in an audio, you can use audio classification task. To decompose sounds in a recording, use audio-to-audio task. To answer questions from a document, you can use question answering task. To answer FAQs from your customers, you can use question answering task. To see if a text is grammatically correct, you can use text classification task. To augment your training data, you can use text classification task. To detect pedestrians, you can use object detection task."
@@ -34,6 +28,51 @@ link_dict = {
34
  "object detection": "https://huggingface.co/tasks/object-detection"}
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  message_history = [{"text":"Let's find out the best task for your use case! Tell me about your use case :)", "is_user":False}]
38
 
39
  st.subheader("If you don't know how to build your machine learning product for your use case, Taskmaster is here to help you! πŸͺ„βœ¨")
@@ -46,21 +85,15 @@ placeholder = st.empty() # placeholder for latest message
46
 
47
  input = st.text_input("Ask me πŸ€—")
48
  if input:
 
49
  message_history.append({"text":input, "is_user" : True})
50
 
51
- data, resp = query(
52
- {
53
- "inputs": {
54
- "question": input,
55
- "context": context,
56
- }
57
- }
58
- )
59
 
60
 
61
  if resp.status_code == 200:
62
 
63
- model_answer = data["answer"]
64
  key_exists = False
65
  for key in link_dict:
66
  if key in model_answer:
 
3
  import os
4
  from streamlit_chat import message
5
  import random
6
+ import sentence-transformers
7
+ import nltk
8
 
 
 
 
 
 
 
 
 
9
 
10
 
11
  context = "To extract information from documents, use sentence similarity task. To classify sentiments, use text classification task. To do sentiment analysis, use text classification task. To detect masks from images, use object detection task. To extract name or address from documents use token classification task. To extract name or address from invoices, use token classification task. To build voice enabled applications, you can use automatic speech recognition task. You can retrieve information from documents using sentence similarity task. You can summarize papers using summarization task. You can convert text to speech using text-to-speech task. To detect language spoken in an audio, you can use audio classification task. To detect emotion in an audio, you can use audio classification task. To detect commands in an audio, you can use audio classification task. To decompose sounds in a recording, use audio-to-audio task. To answer questions from a document, you can use question answering task. To answer FAQs from your customers, you can use question answering task. To see if a text is grammatically correct, you can use text classification task. To augment your training data, you can use text classification task. To detect pedestrians, you can use object detection task."
 
28
  "object detection": "https://huggingface.co/tasks/object-detection"}
29
 
30
 
31
+
32
+
33
+ corpus = []
34
+ sentence_count = []
35
+
36
+ for sent in context.split("."):
37
+
38
+ sentences = nltk.tokenize.sent_tokenize(str(sent), language='english')
39
+ sentence_count.append(len(sentences))
40
+ for _,s in enumerate(sentences):
41
+ corpus.append(s)
42
+
43
+
44
+
45
+ corpus_embeddings = np.load('task_embeddings_msmarco-distilbert-base-v4.npy')
46
+ corpus_embeddings.shape
47
+
48
+
49
+
50
+
51
+ def find_sentences(query):
52
+ query_embedding = model.encode(query)
53
+ hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
54
+ hits = hits[0]
55
+
56
+ for hit in hits:
57
+ corpus_id = hit['corpus_id']
58
+ print(corpus[corpus_id], "(Score: {:.4f})".format(hit['score']))
59
+ # Find source document based on sentence index
60
+ count = 0
61
+ for idx, c in enumerate(sentence_count):
62
+ count+=c
63
+ if (corpus_id > count-1):
64
+ continue
65
+ else:
66
+ doc = corpus[idx]
67
+ print(f"Document: {doc}, {count}")
68
+ break
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
  message_history = [{"text":"Let's find out the best task for your use case! Tell me about your use case :)", "is_user":False}]
77
 
78
  st.subheader("If you don't know how to build your machine learning product for your use case, Taskmaster is here to help you! πŸͺ„βœ¨")
 
85
 
86
  input = st.text_input("Ask me πŸ€—")
87
  if input:
88
+
89
  message_history.append({"text":input, "is_user" : True})
90
 
91
+ model_answer = find_sentences("How can I extract information from invoices?")
 
 
 
 
 
 
 
92
 
93
 
94
  if resp.status_code == 200:
95
 
96
+
97
  key_exists = False
98
  for key in link_dict:
99
  if key in model_answer: