|
import json |
|
import os |
|
import sys |
|
from datetime import datetime, timedelta |
|
|
|
import requests |
|
from llama_index import VectorStoreIndex, StorageContext, download_loader |
|
|
|
from channel_id_mapper import ChannelIdMapper |
|
|
|
token = os.environ.get('SLACK_API_TOKEN') |
|
fromDate = datetime.now() - timedelta(90) |
|
index_root = './index' |
|
|
|
|
|
|
|
def index_channels(channels, name_id_map): |
|
SlackReader = download_loader("SlackReader") |
|
loader = SlackReader(token, earliest_date=fromDate) |
|
for channel in channels: |
|
save_location = f"{index_root}/{channel}" |
|
if not os.path.exists(save_location): |
|
os.makedirs(save_location) |
|
documents = loader.load_data([name_id_map[channel]], reverse_chronological=True) |
|
storage_context = StorageContext.from_defaults() |
|
index = VectorStoreIndex.from_documents(documents=documents, storage_context=storage_context) |
|
storage_context.persist(save_location) |
|
|
|
|
|
def main(): |
|
if len(sys.argv) < 2: |
|
print('Usage: python3 index_slack.py channel1,channel2,channel3') |
|
exit(1) |
|
slack_token = os.environ.get('SLACK_API_TOKEN') |
|
if not slack_token: |
|
print('Please set the SLACK_API_TOKEN environment variable.') |
|
exit(1) |
|
channel_id_mapping = ChannelIdMapper(slack_token) |
|
channel_ids_to_index = [] |
|
channels = sys.argv[1].split(',') |
|
for channel in channels: |
|
if channel not in channel_dict: |
|
print(f'Channel {channel} not found. Available channels: {channel_dict.keys()}') |
|
exit(1) |
|
channel_ids_to_index.append(channel_dict[channel]) |
|
index_channels(channels, channel_dict) |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|