jskinner215 commited on
Commit
70c712a
β€’
1 Parent(s): f4bdea1

changing to add basic contextual options before analyzing data

Browse files
Files changed (1) hide show
  1. app.py +23 -60
app.py CHANGED
@@ -1,68 +1,31 @@
1
- # Bring in deps
2
  import streamlit as st
3
- from langchain.llms import LlamaCpp
4
- from langchain.embeddings import LlamaCppEmbeddings
5
- from langchain.prompts import PromptTemplate
6
- from langchain.chains import LLMChain
7
- from langchain.document_loaders import CSVLoader # Import CSVLoader
8
- from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.vectorstores import Chroma
10
  import pandas as pd
11
 
12
- # Customize the layout
13
- st.set_page_config(page_title="DOCAI", page_icon="πŸ€–", layout="wide", )
14
- st.markdown(f"""
15
- <style>
16
- .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80");
17
- background-attachment: fixed;
18
- background-size: cover}}
19
- </style>
20
- """, unsafe_allow_html=True)
21
-
22
- # function for writing uploaded file in temp
23
- def write_csv_file(content, file_path):
24
- try:
25
- with open(file_path, 'w') as file:
26
- file.write(content)
27
- return True
28
- except Exception as e:
29
- print(f"Error occurred while writing the file: {e}")
30
- return False
31
-
32
- # set prompt template
33
- prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
34
-
35
- {context}
36
-
37
- Question: {question}
38
- Answer:"""
39
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
40
-
41
- # initialize the LLM & Embeddings
42
- llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin")
43
- embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin")
44
- llm_chain = LLMChain(llm=llm, prompt=prompt)
45
 
46
  st.title("πŸ“„ Document Conversation πŸ€–")
47
- uploaded_file = st.file_uploader("Upload a CSV file", type="csv") # Change type to csv
48
 
49
  if uploaded_file is not None:
50
- content = uploaded_file.read().decode('utf-8')
51
- file_path = "temp/file.csv"
52
- write_csv_file(content, file_path)
53
-
54
- loader = CSVLoader(file_path) # Use CSVLoader
55
- docs = loader.load()
56
- text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
57
- texts = text_splitter.split_documents(docs)
58
- db = Chroma.from_documents(texts, embeddings)
59
- st.success("File Loaded Successfully!!")
60
 
61
- # Query through LLM
62
- question = st.text_input("Ask something from the file", placeholder="Find something similar to: ....this.... in the text?", disabled=not uploaded_file,)
63
- if question:
64
- similar_doc = db.similarity_search(question, k=1)
65
- context = similar_doc[0].page_content
66
- query_llm = LLMChain(llm=llm, prompt=prompt)
67
- response = query_llm.run({"context": context, "question": question})
68
- st.write(response)
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
 
 
 
 
3
  import pandas as pd
4
 
5
+ # Initialize the LLM from HuggingFace
6
+ tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
7
+ model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
8
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  st.title("πŸ“„ Document Conversation πŸ€–")
11
+ uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
12
 
13
  if uploaded_file is not None:
14
+ df = pd.read_csv(uploaded_file)
15
+ st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
16
+ st.write("Columns:", df.columns.tolist())
 
 
 
 
 
 
 
17
 
18
+ # Allow user to select columns to focus on
19
+ selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
20
+ if selected_columns:
21
+ st.write(df[selected_columns].head()) # Display first few rows of selected columns
22
+
23
+ # Generate a textual representation of the selected data
24
+ context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
25
+
26
+ # Query through LLM
27
+ question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")
28
+ if question:
29
+ full_query = context + " " + question
30
+ response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
31
+ st.write(response[0]['generated_text'])