tdecae commited on
Commit
25d0cb0
1 Parent(s): a895164

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -87,7 +87,6 @@ from langchain.document_loaders import DirectoryLoader, TextLoader
87
  from langchain.embeddings import HuggingFaceEmbeddings
88
  from langchain.indexes import VectorstoreIndexCreator
89
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
90
- from langchain.llms import HuggingFaceLLM
91
  from langchain.text_splitter import CharacterTextSplitter
92
 
93
  __import__('pysqlite3')
@@ -97,6 +96,7 @@ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
97
  from langchain.vectorstores import Chroma
98
  import gradio as gr
99
  from transformers import pipeline
 
100
 
101
  docs = []
102
 
@@ -118,11 +118,23 @@ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
118
  docs = splitter.split_documents(docs)
119
 
120
  # Convert the document chunks to embedding and save them to the vector store
121
- vectorstore = Chroma.from_documents(docs, embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"), persist_directory="./data")
 
 
122
  vectorstore.persist()
123
 
124
- # Load the Hugging Face model
125
- llm = HuggingFaceLLM(pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B"))
 
 
 
 
 
 
 
 
 
 
126
 
127
  chain = ConversationalRetrievalChain.from_llm(
128
  llm,
@@ -159,3 +171,4 @@ with gr.Blocks() as demo:
159
  demo.launch(debug=True)
160
 
161
 
 
 
87
  from langchain.embeddings import HuggingFaceEmbeddings
88
  from langchain.indexes import VectorstoreIndexCreator
89
  from langchain.indexes.vectorstore import VectorStoreIndexWrapper
 
90
  from langchain.text_splitter import CharacterTextSplitter
91
 
92
  __import__('pysqlite3')
 
96
  from langchain.vectorstores import Chroma
97
  import gradio as gr
98
  from transformers import pipeline
99
+ from sentence_transformers import SentenceTransformer
100
 
101
  docs = []
102
 
 
118
  docs = splitter.split_documents(docs)
119
 
120
  # Convert the document chunks to embedding and save them to the vector store
121
+ embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
122
+ embeddings = HuggingFaceEmbeddings(embedding_model=embedding_model)
123
+ vectorstore = Chroma.from_documents(docs, embedding=embeddings, persist_directory="./data")
124
  vectorstore.persist()
125
 
126
+ # Load the Hugging Face model for text generation
127
+ generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
128
+
129
+ class HuggingFaceLLMWrapper:
130
+ def __init__(self, generator):
131
+ self.generator = generator
132
+
133
+ def __call__(self, prompt, max_length=512):
134
+ result = self.generator(prompt, max_length=max_length, num_return_sequences=1)
135
+ return result[0]['generated_text']
136
+
137
+ llm = HuggingFaceLLMWrapper(generator)
138
 
139
  chain = ConversationalRetrievalChain.from_llm(
140
  llm,
 
171
  demo.launch(debug=True)
172
 
173
 
174
+