slackdemo / summary_index_slack.py
svummidi's picture
POC for passive monitoring
a31ba66
raw
history blame contribute delete
No virus
2.98 kB
import logging
import os
import sys
from datetime import datetime, timedelta
import requests
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "DEBUG"))
from llama_index import VectorStoreIndex, StorageContext, download_loader, DocumentSummaryIndex, ServiceContext, \
get_response_synthesizer
from llama_index.llms import OpenAI
token = os.environ.get('SLACK_API_TOKEN') # Retrieve your Slack API token from environment variables
fromDate = datetime.now() - timedelta(365)
index_root = './summary_index'
def create_channel_dict():
base_url = 'https://slack.com/api/conversations.list'
headers = {'Authorization': f'Bearer {token}'}
params = {
'types': 'public_channel,private_channel',
'limit': 1000
}
response = requests.get(base_url, headers=headers, params=params)
data = response.json()
channel_dict = {}
if data['ok']:
channels = data['channels']
for channel in channels:
channel_name = channel['name']
channel_id = channel['id']
channel_dict[channel_name] = channel_id
else:
print(f'Error: {data}')
#print(channel_dict)
return channel_dict
def index_channels(channels):
SlackReader = download_loader("SlackReader")
loader = SlackReader(token, earliest_date=fromDate)
chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024)
response_synthesizer = get_response_synthesizer(
response_mode="tree_summarize", use_async=True
)
for channel in channels:
documents = loader.load_data(channel_ids=['C02JEH5KGGN'], reverse_chronological=False)
storage_context = StorageContext.from_defaults()
summary_query = (
"Give a concise summary of this document in bullet points. Also describe some of the questions "
"that this document can answer. "
)
index = DocumentSummaryIndex.from_documents(
documents=documents,
service_context=service_context,
response_synthesizer=response_synthesizer,
storage_context=storage_context,
)
#index = VectorStoreIndex.from_documents(documents=documents, storage_context=storage_context)
save_location = f"{index_root}/{channel}"
if not os.path.exists(save_location):
os.makedirs(save_location)
storage_context.persist(save_location)
def main():
if len(sys.argv) < 2:
print('Usage: python3 index_slack.py channel1,channel2,channel3')
exit(1)
channels = sys.argv[1].split(',')
channel_dict = create_channel_dict()
for channel in channels:
if channel not in channel_dict:
print(f'Channel {channel} not found. Available channels: {channel_dict.keys()}')
exit(1)
index_channels(channels)
if __name__ == '__main__':
main()