Spaces:

dmasloff
/

ysda_arxiv_classifier

Sleeping

App Files Files Community

ysda_arxiv_classifier / app.py

dmasloff

Move from json to text

cd5626d verified 4 months ago

raw

history blame contribute delete

1.81 kB

	import streamlit as st
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

	@st.cache_resource
	def load_pipeline():
	model = AutoModelForSequenceClassification.from_pretrained("dmasloff/arxiv_distilbert")
	tokenizer = AutoTokenizer.from_pretrained("dmasloff/arxiv_distilbert")
	return pipeline("text-classification", model=model, tokenizer=tokenizer)

	classifier = load_pipeline()

	st.title("ArXiV article classification via DistilBERT")

	topic_names = {
	0: 'High Energy Physics - Phenomenology (hep-ph)',
	1: 'Nuclear Experiment (nucl-ex)',
	2: 'High Energy Physics - Experiment (hep-ex)',
	3: 'Astrophysics (astro-ph)',
	4: 'Quantum Physics (quant-ph)',
	5: 'Mathematical Physics (math-ph)',
	6: 'High Energy Physics - Theory (hep-th)',
	7: 'Quantitative Biology (q-bio)',
	8: 'Nonlinear Sciences (nlin)',
	9: 'Computer Science (cs)',
	10: 'Quantitative Finance (q-fin)',
	11: 'Mathematics (math)',
	12: 'Condensed Matter (cond-mat)',
	13: 'High Energy Physics - Lattice (hep-lat)',
	14: 'Electrical Engineering and Systems Science (eess)',
	15: 'Physics (physics)',
	16: 'Nuclear Theory (nucl-th)',
	17: 'Statistics (stat)',
	18: 'Economics (econ)',
	19: 'General Relativity and Quantum Cosmology (gr-qc)'
	}

	title = st.text_area("Enter article's title")
	abstract = st.text_area("Enter article's abstract")

	if st.button("Submit"):
	if not title.strip() and not abstract.strip():
	st.warning("Please fill in at least one field.")
	else:
	full_text = f"{title}\n{abstract}".strip()
	with st.spinner("Classifying..."):
	result = classifier(full_text)
	st.success("Classification Result:")
	st.text(str(topic_names[int(result[0]["label"].split("_")[1])]))