kuoyuan commited on
Commit
ea801b7
1 Parent(s): 7cb9314

pull模型文件

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/deployment.xml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
4
+ <serverData>
5
+ <paths name="qiao@192.168.223.130:22">
6
+ <serverdata>
7
+ <mappings>
8
+ <mapping local="$PROJECT_DIR$" web="/" />
9
+ </mappings>
10
+ </serverdata>
11
+ </paths>
12
+ </serverData>
13
+ </component>
14
+ </project>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
6
+ <option name="ignoredErrors">
7
+ <list>
8
+ <option value="N806" />
9
+ </list>
10
+ </option>
11
+ </inspection_tool>
12
+ </profile>
13
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (symptom-sick-2c)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/symptom-sick-2c.iml" filepath="$PROJECT_DIR$/.idea/symptom-sick-2c.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/symptom-sick-2c.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.9 (symptom-sick-2c)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ import os
3
+
4
+ import huggingface_hub
5
+ from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizerFast, \
6
+ AutoModelForSequenceClassification, AutoTokenizer
7
+ import random
8
+ import paddlenlp.datasets
9
+ from paddlenlp.data import Stack, Tuple, Pad
10
+ from paddlenlp.transformers import ElectraForSequenceClassification, ElectraTokenizer, ElectraModel
11
+ import paddle
12
+ import pandas as pd
13
+ import re
14
+ import tqdm
15
+ import cachetools
16
+ import typing
17
+ import torch
18
+ import torch.utils.data as Data
19
+ import datetime
20
+ from transformers import pipeline
21
+
22
+ import gradio
23
+
24
+
25
+ def collate(batch, tokenizer, useFirstDim=True):
26
+ if useFirstDim:
27
+ input_ids = torch.nn.utils.rnn.pad_sequence([_[0].data.get('input_ids')[0] for _ in batch], batch_first=True,
28
+ padding_value=tokenizer.pad_token_id)
29
+ else:
30
+ input_ids = torch.nn.utils.rnn.pad_sequence([_.data.get('input_ids')[0] for _ in batch], batch_first=True,
31
+ padding_value=tokenizer.pad_token_id)
32
+ labels = [_[-1] for _ in batch]
33
+ return torch.LongTensor(input_ids), torch.LongTensor(labels)
34
+
35
+
36
+ def launchGradioNLI():
37
+ # 下载模型
38
+ folder = huggingface_hub.snapshot_download('qiaokuoyuan/symptom-sick-2c')
39
+
40
+ # 读取模型和tokenizer
41
+ model = XLMRobertaForSequenceClassification.from_pretrained(folder)
42
+ tokenizer = XLMRobertaTokenizerFast.from_pretrained(folder)
43
+ model.eval()
44
+
45
+ # 定义补齐函数
46
+ _collate = functools.partial(collate, tokenizer=tokenizer)
47
+
48
+ # 单个症状解析
49
+ def getSickDistributionTensorByOneSymptom(symptom, sick):
50
+ # 需要将当前症状和每个疾病组成数组并 tokenzier
51
+ tokens = [[symptom, sick], ]
52
+ tokens = tokenizer(tokens, add_special_tokens=True,
53
+ return_tensors='pt',
54
+ padding=True,
55
+ truncation='only_first')
56
+ tokens = tokens.data.get('input_ids')
57
+ batchOutputs = []
58
+ batchSize = 64
59
+ with paddle.no_grad():
60
+ for i in range(0, tokens.shape[0], batchSize):
61
+ batch = tokens[i: i + batchSize]
62
+ predict = model(batch)
63
+ batchOutputs.append(predict.logits)
64
+
65
+ batchOutputs = torch.cat(batchOutputs, dim=0)
66
+ return str(batchOutputs[0][1].item())
67
+
68
+ # 单个症状解析
69
+ app = gradio.Interface(fn=getSickDistributionTensorByOneSymptom, inputs=['text', 'text'], outputs='text')
70
+ app.launch()
71
+
72
+
73
+ if __name__ == '__main__':
74
+ launchGradioNLI()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ huggingface_hub
3
+ gradio
4
+ torch
5
+ paddlepaddle
6
+ paddlenlp