eskayML commited on
Commit
a2ec50f
1 Parent(s): 1894bb5

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hella[[:space:]]swag[[:space:]]paper[[:space:]]official.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pyttsx3
3
+ import tempfile
4
+ import PyPDF2
5
+ from huggingface_hub import InferenceClient
6
+
7
+ page_bg_img = """
8
+ <style>
9
+ .stApp {
10
+ background: linear-gradient( #eee 38%, #ccc 68%);
11
+ }
12
+ </style>
13
+ """
14
+
15
+ st.markdown(page_bg_img, unsafe_allow_html=True)
16
+
17
+ st.title("Summarize & Listen to your Academic Materials on the Fly.")
18
+
19
+ uploaded_pdf = st.file_uploader("Upload a research Paper", type="pdf")
20
+ full_text = None
21
+ MODEL_NAME = "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
22
+ client = InferenceClient(MODEL_NAME)
23
+
24
+
25
+ DETAILED_SUMMARIZATION_PROMPT = """
26
+ <INST>You are a very powerful summarization engine for summarizing academic contents,
27
+ now you are to summarize the following text you are going to be provided which is from a document, make sure to understand
28
+ all improperly parsed text and actually parse them properly , also make sure that your final summarization is very coherent and understandable by a student and is under 4000 words ,
29
+ also the length of the summarized text should be less than the original provided text,
30
+ if you are provided with a text that includes unnecessary items that do not contribute value to the book like preface about the author, do not include them in the summarization
31
+
32
+ Your summary should be concise and should accurately and objectively communicate the key points of the paper.
33
+ You should not include any personal opinions or interpretations in your summary but rather focus on
34
+ objectively presenting the information from the paper. Your summary should be written in your own words
35
+ and should not include any direct quotes from the paper. Please ensure that your summary is clear,
36
+ concise, and accurately reflects the content of the original paper.
37
+ do not go out of context of the words provided.
38
+ Now here is your provided text :
39
+ </INST>
40
+ """
41
+
42
+
43
+ with st.spinner("Extracting Text..."):
44
+ if uploaded_pdf is not None:
45
+ tfile = tempfile.NamedTemporaryFile(delete=False)
46
+ tfile.write(uploaded_pdf.read())
47
+ with open(tfile.name, "rb") as pdf_file:
48
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
49
+ num_pages = len(pdf_reader.pages)
50
+
51
+ # Get text from all pages
52
+ full_text = ""
53
+ for page_num in range(num_pages):
54
+ page = pdf_reader.pages[page_num]
55
+ page_text = page.extract_text()
56
+ full_text += page_text
57
+
58
+ # truncating the full text at 25k characters
59
+ full_text = full_text if len(full_text) < 100000 else full_text[:100000]
60
+ # print(full_text)
61
+ st.success("Text Extracted Successfully!!!")
62
+
63
+
64
+ ###################################################################################
65
+
66
+
67
+ def synthesize_text_to_audio(text):
68
+ engine = pyttsx3.init()
69
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
70
+ temp_file_path = temp_file.name
71
+ engine.save_to_file(text, temp_file_path) # Save the audio to a temporary file
72
+
73
+ engine.runAndWait()
74
+ sound_file = open(temp_file_path, "rb") # Open the saved audio file for reading
75
+ return sound_file
76
+
77
+
78
+
79
+ summarized_text = None
80
+ if full_text:
81
+ with st.spinner("Summarizing Text Content..."):
82
+ summarized_text = client.text_generation(
83
+ DETAILED_SUMMARIZATION_PROMPT + full_text,
84
+ max_new_tokens=4096,
85
+ temperature=0.2,
86
+ top_p=0.8,
87
+ )
88
+ print(summarized_text)
89
+
90
+ if summarized_text:
91
+ with st.spinner('Synthesizing to Audio...'):
92
+ st.audio(synthesize_text_to_audio(summarized_text))
93
+
hella swag paper official.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a042fdcc4a22848cffde37e9c5257e423443980172bab7367dffb4e779aac37
3
+ size 1192712
sklistener-icon.jpg ADDED