pull模型文件

Browse files

Files changed (10) hide show

.idea/.gitignore +8 -0
.idea/deployment.xml +14 -0
.idea/inspectionProfiles/Project_Default.xml +13 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/symptom-sick-2c.iml +10 -0
.idea/vcs.xml +6 -0
app.py +74 -0
requirements.txt +6 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/deployment.xml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
+    <serverData>
+      <paths name="qiao@192.168.223.130:22">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+  </component>
+</project>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,13 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N806" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (symptom-sick-2c)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/symptom-sick-2c.iml" filepath="$PROJECT_DIR$/.idea/symptom-sick-2c.iml" />
+    </modules>
+  </component>
+</project>

.idea/symptom-sick-2c.iml ADDED Viewed

	@@ -0,0 +1,10 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.9 (symptom-sick-2c)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import functools
+import os
+import huggingface_hub
+from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizerFast, \
+    AutoModelForSequenceClassification, AutoTokenizer
+import random
+import paddlenlp.datasets
+from paddlenlp.data import Stack, Tuple, Pad
+from paddlenlp.transformers import ElectraForSequenceClassification, ElectraTokenizer, ElectraModel
+import paddle
+import pandas as pd
+import re
+import tqdm
+import cachetools
+import typing
+import torch
+import torch.utils.data as Data
+import datetime
+from transformers import pipeline
+import gradio
+def collate(batch, tokenizer, useFirstDim=True):
+    if useFirstDim:
+        input_ids = torch.nn.utils.rnn.pad_sequence([_[0].data.get('input_ids')[0] for _ in batch], batch_first=True,
+                                                    padding_value=tokenizer.pad_token_id)
+    else:
+        input_ids = torch.nn.utils.rnn.pad_sequence([_.data.get('input_ids')[0] for _ in batch], batch_first=True,
+                                                    padding_value=tokenizer.pad_token_id)
+    labels = [_[-1] for _ in batch]
+    return torch.LongTensor(input_ids), torch.LongTensor(labels)
+def launchGradioNLI():
+    # 下载模型
+    folder = huggingface_hub.snapshot_download('qiaokuoyuan/symptom-sick-2c')
+    # 读取模型和tokenizer
+    model = XLMRobertaForSequenceClassification.from_pretrained(folder)
+    tokenizer = XLMRobertaTokenizerFast.from_pretrained(folder)
+    model.eval()
+    # 定义补齐函数
+    _collate = functools.partial(collate, tokenizer=tokenizer)
+    # 单个症状解析
+    def getSickDistributionTensorByOneSymptom(symptom, sick):
+        # 需要将当前症状和每个疾病组成数组并 tokenzier
+        tokens = [[symptom, sick], ]
+        tokens = tokenizer(tokens, add_special_tokens=True,
+                           return_tensors='pt',
+                           padding=True,
+                           truncation='only_first')
+        tokens = tokens.data.get('input_ids')
+        batchOutputs = []
+        batchSize = 64
+        with paddle.no_grad():
+            for i in range(0, tokens.shape[0], batchSize):
+                batch = tokens[i: i + batchSize]
+                predict = model(batch)
+                batchOutputs.append(predict.logits)
+            batchOutputs = torch.cat(batchOutputs, dim=0)
+            return str(batchOutputs[0][1].item())
+    # 单个症状解析
+    app = gradio.Interface(fn=getSickDistributionTensorByOneSymptom, inputs=['text', 'text'], outputs='text')
+    app.launch()
+if __name__ == '__main__':
+    launchGradioNLI()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers
+huggingface_hub
+gradio
+torch
+paddlepaddle
+paddlenlp