| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import gradio as gr | |
| # Load model directly | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import transformers | |
| tokenizer = AutoTokenizer.from_pretrained("nebiyu29/fintunned-v2-roberta_GA") | |
| model = AutoModelForSequenceClassification.from_pretrained("nebiyu29/fintunned-v2-roberta_GA") | |
| # Load the model and tokenizer | |
| # model = transformers.AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli") | |
| # tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/bart-large-mnli") | |
| # Define a function to split a text into segments of 512 tokens | |
| def split_text(text): | |
| # Tokenize the text | |
| tokens = tokenizer.tokenize(text) | |
| # Initialize an empty list for segments | |
| segments = [] | |
| # Initialize an empty list for current segment | |
| current_segment = [] | |
| # Initialize a counter for tokens | |
| token_count = 0 | |
| # Loop through the tokens | |
| for token in tokens: | |
| # Add the token to the current segment | |
| current_segment.append(token) | |
| # Increment the token count | |
| token_count += 1 | |
| # If the token count reaches 512 or the end of the text, add the current segment to the segments list | |
| if token_count == 512 or token == tokens[-1]: | |
| # Convert the current segment to a string and add it to the segments list | |
| segments.append(tokenizer.convert_tokens_to_string(current_segment)) | |
| # Reset the current segment and the token count | |
| current_segment = [] | |
| token_count = 0 | |
| # Return the segments list | |
| return segments | |
| # a function that classifies text | |
| def classify_text(text): | |
| # Define labels | |
| labels = ["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"] | |
| # Split text into segments using split_text | |
| segments = split_text(text) | |
| # Initialize empty list for predictions | |
| predictions = [] | |
| # Move device to GPU if available | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = model.to(device) | |
| # Loop through segments, process, and store predictions | |
| for segment in segments: | |
| inputs = tokenizer([segment], padding=True, return_tensors="pt") | |
| input_ids = inputs["input_ids"].to(device) | |
| attention_mask = inputs["attention_mask"].to(device) | |
| with torch.no_grad(): | |
| outputs = model(input_ids, attention_mask=attention_mask) | |
| # Extract predictions for each segment | |
| probs, preds = extract_predictions(outputs) # Define this function based on your model's output | |
| # Append predictions for this segment | |
| predictions.append({ | |
| "segment_text": segment, | |
| "label": preds[0], # Assuming single label prediction | |
| "probability": probs[preds[0]] # Access probability for the predicted label | |
| }) | |
| # Define a function to extract predictions from model output (adjust as needed) | |
| def extract_predictions(outputs): | |
| # Assuming outputs contain logits and labels (adapt based on your model's output format) | |
| logits = outputs.logits | |
| probs = logits.softmax(dim=1) | |
| preds = torch.argmax(probs, dim=1) | |
| return probs, preds # Return all probabilities and predicted labels | |
| # def classify_text(text): | |
| # """ | |
| # This function preprocesses, feeds text to the model, and outputs the predicted class. | |
| # """ | |
| # inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") | |
| # outputs = model(**inputs) | |
| # logits = outputs.logits # Access logits instead of pipeline output | |
| # predictions = torch.argmax(logits, dim=-1) # Apply argmax for prediction | |
| # return model.config.id2label[predictions.item()] # Map index to class label | |
| interface = gr.Interface( | |
| fn=classify_text, | |
| inputs="text", | |
| outputs="text", | |
| title="Text Classification Demo", | |
| description="Enter some text, and the model will classify it.", | |
| #choices=["depression", "anxiety", "bipolar disorder", "schizophrenia", "PTSD", "OCD", "ADHD", "autism", "eating disorder", "personality disorder", "phobia"] # Adjust class names | |
| ) | |
| #interface.launch() | |