Spaces:

FPRT
/

SurgerySort

Runtime error

App Files Files Community

noequal commited on Aug 25, 2023

Commit

5c0eadd

•

1 Parent(s): 51c1886

Create app.py

Browse files

Entry point for app. This file handles input processing including tokenizing

Files changed (1) hide show

app.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import streamlit as st
+# Load a pre-trained version of ClinicalGPT
+model = AutoModelForCausalLM.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
+# Tokenize your clinical text data using the AutoTokenizer class
+tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
+# Convert your tokenized data into PyTorch tensors and create a PyTorch Dataset object
+import torch
+from torch.utils.data import Dataset
+    class ClinicalDataset(Dataset):
+         def __init__(self, texts, labels, tokenizer):
+             self.texts = texts
+             self.labels = labels
+             self.tokenizer = tokenizer
+         def __len__(self):
+             return len(self.texts)
+         def __getitem__(self, idx):
+             text = self.texts[idx]
+             label = self.labels[idx]
+             encoding = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+             return {"input_ids": encoding["input_ids"].squeeze(), "attention_mask": encoding["attention_mask"].squeeze(), "labels": torch.tensor(label)}
+     dataset = ClinicalDataset(texts=train_texts, labels=train_labels, tokenizer=tokenizer)
+# Fine-tune the pre-trained model on your clinical dataset
+from transformers import Trainer, TrainingArguments
+    training_args = TrainingArguments(
+         output_dir='./results',          # output directory
+         num_train_epochs=3,              # total number of training epochs
+         per_device_train_batch_size=16,  # batch size per device during training
+         per_device_eval_batch_size=64,   # batch size for evaluation
+         warmup_steps=500,                # number of warmup steps for learning rate scheduler
+         weight_decay=0.01,               # strength of weight decay
+         logging_dir='./logs',            # directory for storing logs
+         logging_steps=10, )
+    trainer = Trainer(
+      model=model,
+      args=training_args,
+      train_dataset=dataset,
+      eval_dataset=val_dataset,
+      data_collator=lambda data: {'input_ids': torch.stack([f['input_ids'] for f in data]),
+                                'attention_mask': torch.stack([f['attention_mask'] for f in data]),
+                                 'labels': torch.stack([f['labels'] for f in data])}, )
+    trainer.train()