silvanocerza commited on
Commit
0beed35
1 Parent(s): 1334c47

Update to latest haystack-ai beta version

Browse files
Files changed (2) hide show
  1. main.py +27 -27
  2. requirements.txt +1 -1
main.py CHANGED
@@ -1,23 +1,23 @@
1
- from typing import List, Tuple
2
- from pathlib import Path
3
  import os
4
  import subprocess
 
 
5
 
 
6
  from dotenv import load_dotenv
7
- from haystack.preview import Pipeline
8
- from haystack.preview.dataclasses import GeneratedAnswer
9
- from haystack.preview.components.retrievers import MemoryBM25Retriever
10
- from haystack.preview.components.generators.openai.gpt import GPTGenerator
11
- from haystack.preview.components.builders.answer_builder import AnswerBuilder
12
- from haystack.preview.components.builders.prompt_builder import PromptBuilder
13
- from haystack.preview.components.preprocessors import (
14
  DocumentCleaner,
15
- TextDocumentSplitter,
16
  )
17
- from haystack.preview.components.writers import DocumentWriter
18
- from haystack.preview.components.file_converters import TextFileToDocument
19
- from haystack.preview.document_stores.memory import MemoryDocumentStore
20
- import streamlit as st
 
21
 
22
  # Load the environment variables, we're going to need it for OpenAI
23
  load_dotenv()
@@ -82,7 +82,7 @@ def fetch(documentations: List[Tuple[str, str, str]]):
82
  for p in repo.glob(pattern):
83
  data = {
84
  "path": p,
85
- "metadata": {
86
  "url_source": f"{url}/tree/{branch}/{p.relative_to(repo)}",
87
  "suffix": p.suffix,
88
  },
@@ -95,15 +95,15 @@ def fetch(documentations: List[Tuple[str, str, str]]):
95
  @st.cache_resource(show_spinner=False)
96
  def document_store():
97
  # We're going to store the processed documents in here
98
- return MemoryDocumentStore()
99
 
100
 
101
  @st.cache_resource(show_spinner=False)
102
  def index_files(files):
103
  # We create some components
104
- text_converter = TextFileToDocument(progress_bar=False)
105
  document_cleaner = DocumentCleaner()
106
- document_splitter = TextDocumentSplitter()
107
  document_writer = DocumentWriter(
108
  document_store=document_store(), policy="overwrite"
109
  )
@@ -118,24 +118,24 @@ def index_files(files):
118
  indexing_pipeline.connect("cleaner", "splitter")
119
  indexing_pipeline.connect("splitter", "writer")
120
 
121
- # And now we save the documentation in our MemoryDocumentStore
122
  paths = []
123
- metadata = []
124
  for f in files:
125
  paths.append(f["path"])
126
- metadata.append(f["metadata"])
127
  indexing_pipeline.run(
128
  {
129
  "converter": {
130
- "paths": paths,
131
- "metadata": metadata,
132
  }
133
  }
134
  )
135
 
136
 
137
  def search(question: str) -> GeneratedAnswer:
138
- retriever = MemoryBM25Retriever(document_store=document_store(), top_k=5)
139
 
140
  template = (
141
  "Take a deep breath and think then answer given the context"
@@ -146,7 +146,7 @@ def search(question: str) -> GeneratedAnswer:
146
  prompt_builder = PromptBuilder(template)
147
 
148
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
149
- generator = GPTGenerator(api_key=OPENAI_API_KEY)
150
  answer_builder = AnswerBuilder()
151
 
152
  query_pipeline = Pipeline()
@@ -202,7 +202,7 @@ if question := st.text_input(
202
  st.markdown(answer.data)
203
  with st.expander("See sources:"):
204
  for document in answer.documents:
205
- url_source = document.metadata.get("url_source", "")
206
  st.write(url_source)
207
- st.text(document.text)
208
  st.divider()
 
 
 
1
  import os
2
  import subprocess
3
+ from pathlib import Path
4
+ from typing import List, Tuple
5
 
6
+ import streamlit as st
7
  from dotenv import load_dotenv
8
+ from haystack.components.builders.answer_builder import AnswerBuilder
9
+ from haystack.components.builders.prompt_builder import PromptBuilder
10
+ from haystack.components.converters import TextFileToDocument
11
+ from haystack.components.generators.openai import OpenAIGenerator
12
+ from haystack.components.preprocessors import (
 
 
13
  DocumentCleaner,
14
+ DocumentSplitter,
15
  )
16
+ from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
17
+ from haystack.components.writers import DocumentWriter
18
+ from haystack.core.pipeline import Pipeline
19
+ from haystack.dataclasses import GeneratedAnswer
20
+ from haystack.document_stores.in_memory import InMemoryDocumentStore
21
 
22
  # Load the environment variables, we're going to need it for OpenAI
23
  load_dotenv()
 
82
  for p in repo.glob(pattern):
83
  data = {
84
  "path": p,
85
+ "meta": {
86
  "url_source": f"{url}/tree/{branch}/{p.relative_to(repo)}",
87
  "suffix": p.suffix,
88
  },
 
95
  @st.cache_resource(show_spinner=False)
96
  def document_store():
97
  # We're going to store the processed documents in here
98
+ return InMemoryDocumentStore()
99
 
100
 
101
  @st.cache_resource(show_spinner=False)
102
  def index_files(files):
103
  # We create some components
104
+ text_converter = TextFileToDocument()
105
  document_cleaner = DocumentCleaner()
106
+ document_splitter = DocumentSplitter()
107
  document_writer = DocumentWriter(
108
  document_store=document_store(), policy="overwrite"
109
  )
 
118
  indexing_pipeline.connect("cleaner", "splitter")
119
  indexing_pipeline.connect("splitter", "writer")
120
 
121
+ # And now we save the documentation in our InMemoryDocumentStore
122
  paths = []
123
+ meta = []
124
  for f in files:
125
  paths.append(f["path"])
126
+ meta.append(f["meta"])
127
  indexing_pipeline.run(
128
  {
129
  "converter": {
130
+ "sources": paths,
131
+ "meta": meta,
132
  }
133
  }
134
  )
135
 
136
 
137
  def search(question: str) -> GeneratedAnswer:
138
+ retriever = InMemoryBM25Retriever(document_store=document_store(), top_k=5)
139
 
140
  template = (
141
  "Take a deep breath and think then answer given the context"
 
146
  prompt_builder = PromptBuilder(template)
147
 
148
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
149
+ generator = OpenAIGenerator(api_key=OPENAI_API_KEY)
150
  answer_builder = AnswerBuilder()
151
 
152
  query_pipeline = Pipeline()
 
202
  st.markdown(answer.data)
203
  with st.expander("See sources:"):
204
  for document in answer.documents:
205
+ url_source = document.meta.get("url_source", "")
206
  st.write(url_source)
207
+ st.text(document.content)
208
  st.divider()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- haystack-ai==0.79.0
2
  more_itertools
3
  langdetect
4
  streamlit==1.27.2
 
1
+ haystack-ai==2.0.0b5
2
  more_itertools
3
  langdetect
4
  streamlit==1.27.2