import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline from transformers import pipeline with open('labels.txt') as f: LABEL2STR = f.readline().split() @st.cache(allow_output_mutation=True) def load_model(): tokenizer = AutoTokenizer.from_pretrained("kirillbogatiy/model_topics") model = AutoModelForSequenceClassification.from_pretrained("kirillbogatiy/model_topics") pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True) return pipe def pretty_output(predictions, thr=0.95): cumulative_score = 0 st.write('Possible topics:') for label, data in enumerate(sorted(predictions[0], key=lambda item: item['score'], reverse=True)): score = data['score'] cumulative_score += score st.write('{}: {} %'.format(LABEL2STR[label], round(100 * score, 2))) if cumulative_score >= thr: return if __name__ == '__main__': title = st.text_input('Input a title here:') abstract = st.text_input('Input an abstract here:') pipe = load_model() if title: predictions = pipe('Title: {}\n\nAbstract: {}'.format(title, abstract)) pretty_output(predictions)