Spaces:

DrunkenMonkey
/

WordSimilarity

Sleeping

App Files Files Community

DrunkenMonkey commited on Apr 19

Commit

1710b29

•

1 Parent(s): 5d58dc9

Upload 4 files

Browse files

Files changed (4) hide show

.env.example +1 -0
app.py +82 -0
myData.csv +12 -0
requirements.txt +0 -0

.env.example ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=""

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#Allows you to use Streamlit, a framework for building interactive web applications.
+#It provides functions for creating UIs, displaying data, and handling user inputs.
+import streamlit as st
+#This module provides a way to interact with the operating system, such as accessing environment variables, working with files
+#and directories, executing shell commands, etc
+import os
+#Helps us generate embeddings
+#An embedding is a vector (list) of floating point numbers. The distance between two vectors measures their relatedness.
+#Small distances suggest high relatedness and large distances suggest low relatedness.
+#As Langchain team has been working aggresively on improving the tool, we can see a lot of changes happening every weeek,
+#As a part of it, the below import has been depreciated
+#from langchain.embeddings import OpenAIEmbeddings
+#New import from langchain, which replaces the above
+from langchain_openai import OpenAIEmbeddings
+#FAISS is an open-source library developed by Facebook AI Research for efficient similarity search and clustering of large-scale datasets, particularly with high-dimensional vectors.
+#It provides optimized indexing structures and algorithms for tasks like nearest neighbor search and recommendation systems.
+#As Langchain team has been working aggresively on improving the tool, we can see a lot of changes happening every weeek,
+#As a part of it, the below import has been depreciated
+#from langchain.vectorstores import FAISS
+#New import from langchain, which replaces the above
+from langchain_community.vectorstores import FAISS
+#load_dotenv() is a function that loads variables from a .env file into environment variables in a Python script.
+#It allows you to store sensitive information or configuration settings separate from your code
+#and access them within your application.
+from dotenv import load_dotenv
+load_dotenv()
+#By using st.set_page_config(), you can customize the appearance of your Streamlit application's web page
+st.set_page_config(page_title="Educate Kids", page_icon=":robot:")
+st.header("Hey, Ask me something & I will give out similar things")
+#Initialize the OpenAIEmbeddings object
+embeddings = OpenAIEmbeddings()
+#The below snippet helps us to import CSV file data for our tasks
+from langchain.document_loaders.csv_loader import CSVLoader
+loader = CSVLoader(file_path='myData.csv', csv_args={
+    'delimiter': ',',
+    'quotechar': '"',
+    'fieldnames': ['Words']
+})
+#Assigning the data inside the csv to our variable here...
+data = loader.load()
+#Display the data
+print(data)
+db = FAISS.from_documents(data, embeddings)
+#Function to receive input from user and store it in a variable
+def get_text():
+    input_text = st.text_input("You: ", key= input)
+    return input_text
+user_input=get_text()
+submit = st.button('Find similar Things')
+if submit:
+    #If the button is clicked, the below snippet will fetch us the similar text
+    docs = db.similarity_search(user_input)
+    print(docs)
+    st.subheader("Top Matches:")
+    st.text(docs[0])
+    st.text(docs[1].page_content)

myData.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+Words
+Elephant
+Lion
+Tiger
+Dog
+Cricket
+Footbal
+Tennis
+Basketball
+Apple
+Orange
+Banana

requirements.txt ADDED Viewed

Binary file (286 Bytes). View file