Spaces:

nlp-group
/

Medical_Women_chat

Runtime error

App Files Files Community

Medical_Women_chat / app.py

jrocha

Update app.py

c1a3ab2 verified 6 months ago

raw

history blame contribute delete

No virus

3.6 kB

	# -- coding: utf-8 --
	"""gradio_sindi.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/12KZGcYbsXlMWYC8U4aeR_Ex0u8fJLgly

	# libraries
	"""

	import gradio as gr
	import torch
	from transformers import pipeline
	import numpy as np
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	#import re

	"""# data - text"""

	splitted_df = pd.read_csv('splitted_df_jo.csv')

	"""# getting context"""

	def remove_symbols(text):
	remove_list = ['/', '(', ')', '\n', '.']
	remove_chars = "".join(remove_list)
	cleaned_text = "".join([char for char in text if char not in remove_chars])

	# Remove non-ASCII characters
	#pattern_ascii = r'[^\x00-\x7F]' # Matches any character outside the ASCII range
	#filtered_text = re.sub(pattern_ascii, '', cleaned_text)

	return cleaned_text

	def context_func(message):
	# Create a TF-IDF vectorizer
	vectorizer = TfidfVectorizer()

	# Convert abstracts and question to TF-IDF vectors
	text_tfidf = vectorizer.fit_transform(splitted_df["section_text"])
	question_tfidf = vectorizer.transform([message])

	# Calculate cosine similarity between question and each abstract
	similarities = cosine_similarity(question_tfidf, text_tfidf)[0]

	# Find the index of the most similar abstract
	most_similar_index = similarities.argmax()

	# Get the most similar abstract
	most_similar_context = splitted_df["section_text"][most_similar_index]
	most_similar_context = remove_symbols(most_similar_context)

	return most_similar_context

	def answer_question(question):
	context = context_func(question)
	tokenizer = AutoTokenizer.from_pretrained("nlp-group/sindi-bert-final")
	model = AutoModelForQuestionAnswering.from_pretrained("nlp-group/sindi-bert-final")
	# Tokenize the inputs
	inputs = tokenizer(question, context, return_tensors="pt", max_length=512, truncation=True)

	# Get the answer from the model
	outputs = model(**inputs)
	answer_start_scores = outputs.start_logits
	answer_end_scores = outputs.end_logits
	answer_start = torch.argmax(answer_start_scores)
	answer_end = torch.argmax(answer_end_scores) + 1
	answer = tokenizer.decode(inputs["input_ids"][0][answer_start:answer_end])

	return answer, context

	def main():
	""""
	Initializes a Women Cancer ChatBot interface using Hugging Face models for question answering.

	This function loads a pretrained tokenizer and model from the Hugging Face model hub
	and creates a Gradio interface for the ChatBot. Users can input questions related to
	women's cancer topics, and the ChatBot will generate answers based on the provided context.

	Returns:
	None

	Example:
	>>> main()
	"""
	tokenizer = AutoTokenizer.from_pretrained("nlp-group/sindi-bert-final")
	model = AutoModelForQuestionAnswering.from_pretrained("nlp-group/sindi-bert-final")
	iface = gr.Interface(fn=answer_question,
	inputs=["text"],
	outputs=[gr.Textbox(label="Answer")],
	title="Women Cancer ChatBot",
	description="How can I help you?",
	examples=[
	["What is breast cancer?"],
	["What are treatments for cervical cancer?"]
	])

	return iface.launch(debug = True, share=True)

	if __name__ == "__main__":
	main()