awacke1 commited on
Commit
2d570c8
·
1 Parent(s): 8895a7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import pandas_profiling as pp
2
  import pandas as pd
3
  from datasets import load_dataset
 
4
  #LOINC
5
  datasetLOINC = load_dataset("awacke1/LOINC-CodeSet-Value-Description.csv")
6
  #SNOMED:
@@ -8,6 +9,11 @@ datasetSNOMED = load_dataset("awacke1/SNOMED-CT-Code-Value-Semantic-Set.csv")
8
  #eCQM:
9
  dataseteCQM = load_dataset("awacke1/eCQM-Code-Value-Semantic-Set.csv")
10
 
 
 
 
 
 
11
  print(datasetLOINC)
12
  print(datasetSNOMED)
13
  print(dataseteCQM)
@@ -78,15 +84,19 @@ def fn( text1, text2, single_checkbox, checkboxes, radio, im4,
78
  #print(returnMsg)
79
 
80
  try:
81
- top1matchLOINC = json.loads(start_with_searchTermLOINC['train'])
82
- top1matchSNOMED = json.loads(start_with_searchTermSNOMED['train'])
83
- top1matchCQM = json.loads(start_with_searchTermCQM['train'])
 
 
 
84
  except:
85
- print(start_with_searchTermLOINC[0])
86
- print(start_with_searchTermSNOMED[0] )
87
- print(start_with_searchTermCQM[0] )
 
88
 
89
- print(returnMsg)
90
  print("Datasets Processed")
91
 
92
  return (
 
1
  import pandas_profiling as pp
2
  import pandas as pd
3
  from datasets import load_dataset
4
+
5
  #LOINC
6
  datasetLOINC = load_dataset("awacke1/LOINC-CodeSet-Value-Description.csv")
7
  #SNOMED:
 
9
  #eCQM:
10
  dataseteCQM = load_dataset("awacke1/eCQM-Code-Value-Semantic-Set.csv")
11
 
12
+ # map using autotokenizer
13
+ from transformers import AutoTokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
15
+ dataset = dataset.map(lambda datasetLOINC : tokenizer(datasetLOINC["Description"]), batched=True)
16
+
17
  print(datasetLOINC)
18
  print(datasetSNOMED)
19
  print(dataseteCQM)
 
84
  #print(returnMsg)
85
 
86
  try:
87
+ #top1matchLOINC = json.loads(start_with_searchTermLOINC['train'])
88
+ #top1matchSNOMED = json.loads(start_with_searchTermSNOMED['train'])
89
+ #top1matchCQM = json.loads(start_with_searchTermCQM['train'])
90
+ top1matchLOINC = json.loads(start_with_searchTermLOINC)
91
+ top1matchSNOMED = json.loads(start_with_searchTermSNOMED)
92
+ top1matchCQM = json.loads(start_with_searchTermCQM)
93
  except:
94
+ print('Hello')
95
+ #print(start_with_searchTermLOINC[0])
96
+ #print(start_with_searchTermSNOMED[0] )
97
+ #print(start_with_searchTermCQM[0] )
98
 
99
+ #print(returnMsg)
100
  print("Datasets Processed")
101
 
102
  return (