ingest topic changes
Browse files- pipeline/news_ingest.py +2 -6
pipeline/news_ingest.py
CHANGED
|
@@ -9,7 +9,6 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
|
| 9 |
from components.indexers.news_indexer import get_or_build_index_from_docs
|
| 10 |
from components.fetchers.google_search import fetch_google_news
|
| 11 |
from components.fetchers.scraper import scrape_url
|
| 12 |
-
from components.generators.daily_feed import generate_and_cache_daily_feed
|
| 13 |
from llama_index.core.settings import Settings
|
| 14 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 15 |
from llama_index.core.schema import Document
|
|
@@ -44,7 +43,7 @@ async def build_documents(data: List[Dict]) -> List[Document]:
|
|
| 44 |
metadata={
|
| 45 |
"title": entry["title"],
|
| 46 |
"url": entry["url"],
|
| 47 |
-
"topic": entry["topic"],
|
| 48 |
"source": entry["source"]
|
| 49 |
}
|
| 50 |
)
|
|
@@ -101,10 +100,7 @@ async def main():
|
|
| 101 |
documents = await build_documents(all_articles)
|
| 102 |
get_or_build_index_from_docs(documents)
|
| 103 |
|
| 104 |
-
|
| 105 |
-
# generate_and_cache_daily_feed(documents) # β
SYNC CALL
|
| 106 |
-
|
| 107 |
-
# print(f"β
Indexed, headlines generated, and stored at: {INDEX_DIR}")
|
| 108 |
|
| 109 |
# π Entrypoint
|
| 110 |
if __name__ == "__main__":
|
|
|
|
| 9 |
from components.indexers.news_indexer import get_or_build_index_from_docs
|
| 10 |
from components.fetchers.google_search import fetch_google_news
|
| 11 |
from components.fetchers.scraper import scrape_url
|
|
|
|
| 12 |
from llama_index.core.settings import Settings
|
| 13 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
| 14 |
from llama_index.core.schema import Document
|
|
|
|
| 43 |
metadata={
|
| 44 |
"title": entry["title"],
|
| 45 |
"url": entry["url"],
|
| 46 |
+
"topic": entry["topic"].lower().replace(" news", ""), # normalized topic key
|
| 47 |
"source": entry["source"]
|
| 48 |
}
|
| 49 |
)
|
|
|
|
| 100 |
documents = await build_documents(all_articles)
|
| 101 |
get_or_build_index_from_docs(documents)
|
| 102 |
|
| 103 |
+
print("β
Indexing complete.")
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
# π Entrypoint
|
| 106 |
if __name__ == "__main__":
|