File size: 668 Bytes
4ebd76c
 
ffbadfd
2c8d5d0
 
efed61c
4ebd76c
ffbadfd
 
 
4ebd76c
2c8d5d0
ffbadfd
 
4ebd76c
ffbadfd
2c8d5d0
ffbadfd
2c8d5d0
 
9f56d8d
2c8d5d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
import streamlit as st

text = st.text_area('enter some text to start')

full_text = open("state_of_the_union.txt", "r").read()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_text(full_text)

embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-base")
db = Chroma.from_texts(texts, embeddings)
retriever = db.as_retriever()

retrieved_docs = retriever.invoke(
    text
)

if text: 
    st.text(retrieved_docs[0].page_content)