girgis commited on
Commit
bc64242
1 Parent(s): b29c475

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -19
app.py CHANGED
@@ -1,21 +1,76 @@
 
 
1
  import streamlit as st
 
 
 
 
 
2
 
3
- def extract_text(text):
4
- # Extract the first 100 characters
5
- extracted_text = text[:100]
6
- return extracted_text
7
-
8
- # Streamlit UI
9
- st.title('Text Extraction App')
10
-
11
- # Text input
12
- user_input = st.text_area('Enter your text here:', '')
13
-
14
- # Button to trigger extraction
15
- if st.button('Extract Text'):
16
- if user_input:
17
- extracted_text = extract_text(user_input) + st.secrets['api_key']
18
- st.write('Extracted Text:')
19
- st.write(extracted_text)
20
- else:
21
- st.warning('Please enter some text before extracting.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
  import streamlit as st
4
+ import openai
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
6
+ from sentence_transformers import CrossEncoder
7
+ import fitz # PyMuPDF library for PDF processing
8
+ import tempfile
9
 
10
+ load_dotenv()
11
+
12
+ openai.api_key = os.getenv(st.secrets['api_key'])
13
+
14
+ # Create a sidebar
15
+ st.sidebar.title("Model Configuration")
16
+
17
+ # File uploader moved to the sidebar
18
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
19
+
20
+ # Option menu for model selection
21
+ model_selection = st.sidebar.selectbox("Model Selection", ["GPT 3.5", "LLama 2"])
22
+
23
+ # Slider for selecting model temperature
24
+ model_temperature = st.sidebar.slider("Select model temperature", 0.0, 0.5, 1.0)
25
+
26
+ # Initialize LLM response storage
27
+ llm_responses = []
28
+
29
+ # Initialize HHEM model
30
+ hhem_model = CrossEncoder('vectara/hallucination_evaluation_model')
31
+
32
+ if uploaded_file is not None:
33
+ # Save the uploaded PDF file to a temporary location
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
35
+ temp_pdf.write(uploaded_file.read())
36
+ temp_pdf_path = temp_pdf.name
37
+
38
+ # Open the PDF file using PyMuPDF
39
+ pdf_document = fitz.open(temp_pdf_path)
40
+
41
+ text = ""
42
+ for page_number in range(pdf_document.page_count):
43
+ page = pdf_document[page_number]
44
+ text += page.get_text()
45
+
46
+ documents = [Document(text=text)]
47
+
48
+ index = VectorStoreIndex.from_documents(documents)
49
+ query_engine = index.as_query_engine()
50
+
51
+ query = st.text_input("Ask your question")
52
+
53
+ button = st.button("Ask")
54
+
55
+ if button:
56
+ print(query)
57
+ response = query_engine.query(query)
58
+ st.write(response.response)
59
+
60
+ # Record LLM response
61
+ llm_responses.append(response.response)
62
+
63
+ # Calculate and display HHEM score for each LLM response
64
+ for i, llm_response in enumerate(llm_responses):
65
+ score = hhem_model.predict([text, llm_response])
66
+ st.sidebar.write(f"Response {i + 1} - HHEM Score: {score}")
67
+
68
+ # Close and remove the temporary PDF file
69
+ pdf_document.close()
70
+ os.remove(temp_pdf_path)
71
+
72
+ # Display LLM responses
73
+ if llm_responses:
74
+ st.sidebar.markdown("## LLM Responses")
75
+ for i, llm_response in enumerate(llm_responses):
76
+ st.sidebar.write(f"Response {i + 1}: {llm_response}")