|
import logging |
|
import os |
|
import sys |
|
from datetime import datetime, timedelta |
|
|
|
import requests |
|
logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', level=os.environ.get("LOGLEVEL", "DEBUG")) |
|
from llama_index import VectorStoreIndex, StorageContext, download_loader, DocumentSummaryIndex, ServiceContext, \ |
|
get_response_synthesizer |
|
from llama_index.llms import OpenAI |
|
|
|
token = os.environ.get('SLACK_API_TOKEN') |
|
fromDate = datetime.now() - timedelta(365) |
|
index_root = './summary_index' |
|
|
|
|
|
def create_channel_dict(): |
|
base_url = 'https://slack.com/api/conversations.list' |
|
headers = {'Authorization': f'Bearer {token}'} |
|
params = { |
|
'types': 'public_channel,private_channel', |
|
'limit': 1000 |
|
} |
|
response = requests.get(base_url, headers=headers, params=params) |
|
data = response.json() |
|
|
|
channel_dict = {} |
|
if data['ok']: |
|
channels = data['channels'] |
|
for channel in channels: |
|
channel_name = channel['name'] |
|
channel_id = channel['id'] |
|
channel_dict[channel_name] = channel_id |
|
else: |
|
print(f'Error: {data}') |
|
|
|
return channel_dict |
|
|
|
|
|
def index_channels(channels): |
|
SlackReader = download_loader("SlackReader") |
|
loader = SlackReader(token, earliest_date=fromDate) |
|
chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo") |
|
service_context = ServiceContext.from_defaults(llm=chatgpt, chunk_size=1024) |
|
response_synthesizer = get_response_synthesizer( |
|
response_mode="tree_summarize", use_async=True |
|
) |
|
for channel in channels: |
|
documents = loader.load_data(channel_ids=['C02JEH5KGGN'], reverse_chronological=False) |
|
storage_context = StorageContext.from_defaults() |
|
summary_query = ( |
|
"Give a concise summary of this document in bullet points. Also describe some of the questions " |
|
"that this document can answer. " |
|
) |
|
index = DocumentSummaryIndex.from_documents( |
|
documents=documents, |
|
service_context=service_context, |
|
response_synthesizer=response_synthesizer, |
|
storage_context=storage_context, |
|
) |
|
|
|
save_location = f"{index_root}/{channel}" |
|
if not os.path.exists(save_location): |
|
os.makedirs(save_location) |
|
storage_context.persist(save_location) |
|
|
|
|
|
def main(): |
|
if len(sys.argv) < 2: |
|
print('Usage: python3 index_slack.py channel1,channel2,channel3') |
|
exit(1) |
|
channels = sys.argv[1].split(',') |
|
channel_dict = create_channel_dict() |
|
for channel in channels: |
|
if channel not in channel_dict: |
|
print(f'Channel {channel} not found. Available channels: {channel_dict.keys()}') |
|
exit(1) |
|
index_channels(channels) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|