DrunkenMonkey commited on
Commit
1710b29
1 Parent(s): 5d58dc9

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env.example +1 -0
  2. app.py +82 -0
  3. myData.csv +12 -0
  4. requirements.txt +0 -0
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=""
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Allows you to use Streamlit, a framework for building interactive web applications.
2
+ #It provides functions for creating UIs, displaying data, and handling user inputs.
3
+ import streamlit as st
4
+
5
+
6
+ #This module provides a way to interact with the operating system, such as accessing environment variables, working with files
7
+ #and directories, executing shell commands, etc
8
+ import os
9
+
10
+ #Helps us generate embeddings
11
+ #An embedding is a vector (list) of floating point numbers. The distance between two vectors measures their relatedness.
12
+ #Small distances suggest high relatedness and large distances suggest low relatedness.
13
+
14
+ #As Langchain team has been working aggresively on improving the tool, we can see a lot of changes happening every weeek,
15
+ #As a part of it, the below import has been depreciated
16
+ #from langchain.embeddings import OpenAIEmbeddings
17
+
18
+ #New import from langchain, which replaces the above
19
+ from langchain_openai import OpenAIEmbeddings
20
+
21
+
22
+ #FAISS is an open-source library developed by Facebook AI Research for efficient similarity search and clustering of large-scale datasets, particularly with high-dimensional vectors.
23
+ #It provides optimized indexing structures and algorithms for tasks like nearest neighbor search and recommendation systems.
24
+
25
+ #As Langchain team has been working aggresively on improving the tool, we can see a lot of changes happening every weeek,
26
+ #As a part of it, the below import has been depreciated
27
+ #from langchain.vectorstores import FAISS
28
+
29
+ #New import from langchain, which replaces the above
30
+ from langchain_community.vectorstores import FAISS
31
+
32
+
33
+ #load_dotenv() is a function that loads variables from a .env file into environment variables in a Python script.
34
+ #It allows you to store sensitive information or configuration settings separate from your code
35
+ #and access them within your application.
36
+ from dotenv import load_dotenv
37
+
38
+
39
+ load_dotenv()
40
+
41
+
42
+ #By using st.set_page_config(), you can customize the appearance of your Streamlit application's web page
43
+ st.set_page_config(page_title="Educate Kids", page_icon=":robot:")
44
+ st.header("Hey, Ask me something & I will give out similar things")
45
+
46
+ #Initialize the OpenAIEmbeddings object
47
+ embeddings = OpenAIEmbeddings()
48
+
49
+ #The below snippet helps us to import CSV file data for our tasks
50
+ from langchain.document_loaders.csv_loader import CSVLoader
51
+ loader = CSVLoader(file_path='myData.csv', csv_args={
52
+ 'delimiter': ',',
53
+ 'quotechar': '"',
54
+ 'fieldnames': ['Words']
55
+ })
56
+
57
+ #Assigning the data inside the csv to our variable here...
58
+ data = loader.load()
59
+
60
+ #Display the data
61
+ print(data)
62
+
63
+ db = FAISS.from_documents(data, embeddings)
64
+
65
+ #Function to receive input from user and store it in a variable
66
+ def get_text():
67
+ input_text = st.text_input("You: ", key= input)
68
+ return input_text
69
+
70
+
71
+ user_input=get_text()
72
+ submit = st.button('Find similar Things')
73
+
74
+ if submit:
75
+
76
+ #If the button is clicked, the below snippet will fetch us the similar text
77
+ docs = db.similarity_search(user_input)
78
+ print(docs)
79
+ st.subheader("Top Matches:")
80
+ st.text(docs[0])
81
+ st.text(docs[1].page_content)
82
+
myData.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Words
2
+ Elephant
3
+ Lion
4
+ Tiger
5
+ Dog
6
+ Cricket
7
+ Footbal
8
+ Tennis
9
+ Basketball
10
+ Apple
11
+ Orange
12
+ Banana
requirements.txt ADDED
Binary file (286 Bytes). View file