Reem333 commited on
Commit
2acba7e
1 Parent(s): b948266

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +65 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import fitz
5
+ import os
6
+
7
+ model = AutoModelForSequenceClassification.from_pretrained("Reem333/Citaion-Classifier")
8
+ tokenizer = AutoTokenizer.from_pretrained("allenai/longformer-base-4096")
9
+
10
+ def extract_text_from_pdf(file_path):
11
+ text = ''
12
+ with fitz.open(file_path) as pdf_document:
13
+ for page_number in range(pdf_document.page_count):
14
+ page = pdf_document.load_page(page_number)
15
+ text += page.get_text()
16
+ return text
17
+
18
+ def predict_class(text):
19
+ try:
20
+ max_length = 4096
21
+ truncated_text = text[:max_length]
22
+
23
+ inputs = tokenizer(truncated_text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
24
+ with torch.no_grad():
25
+ outputs = model(**inputs)
26
+ logits = outputs.logits
27
+ predicted_class = torch.argmax(logits, dim=1).item()
28
+ return predicted_class
29
+ except Exception as e:
30
+ st.error(f"Error during prediction: {e}")
31
+ return None
32
+
33
+ uploaded_files_dir = "uploaded_files"
34
+ os.makedirs(uploaded_files_dir, exist_ok=True)
35
+
36
+ st.title("Paper Citation Classifier")
37
+ option = st.radio("Select input type:", ("Text", "PDF"))
38
+
39
+ if option == "Text":
40
+ text_input = st.text_area("Enter your text here:")
41
+ if st.button("Predict") and text_input.strip():
42
+ predicted_class = predict_class(text_input)
43
+ if predicted_class is not None:
44
+ class_labels = ["Level 1", "Level 2", "Level 3", "Level 4"]
45
+ st.text(f"Predicted Class: {class_labels[predicted_class]}")
46
+
47
+ elif option == "PDF":
48
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
49
+
50
+ if uploaded_file is not None:
51
+ file_path = os.path.join(uploaded_files_dir, uploaded_file.name)
52
+ with open(file_path, "wb") as f:
53
+ f.write(uploaded_file.getbuffer())
54
+ st.success("File uploaded successfully.")
55
+ st.text(f"File Path: {file_path}")
56
+
57
+ file_text = extract_text_from_pdf(file_path)
58
+ st.text("Extracted Text:")
59
+ st.text(file_text)
60
+
61
+ if st.button("Predict"):
62
+ predicted_class = predict_class(file_text)
63
+ if predicted_class is not None:
64
+ class_labels = ["Level 1", "Level 2", "Level 3", "Level 4"]
65
+ st.text(f"Predicted Class: {class_labels[predicted_class]}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ PyMuPDF
5
+ PyPDFium2