import tiktoken
from llama_index.text_splitter import SentenceSplitter

d = {'title': "THE BIG AI RESET: The Next Global SuperPower Isn't Who You Think | Ian Bremmer",
 'video_id': 'nXJBccSwtB8',
 'playlist_id': 'PL8qcvQ7Byc3OJ02hbWJbHWePh4XEg3cvo',
 'length': 5410,
 'thumbnail_url': 'https://i.ytimg.com/vi/nXJBccSwtB8/hq720.jpg',
 'views': 138628,
 'episode_url': 'https://www.youtube.com/watch?v=nXJBccSwtB8&list=PL8qcvQ7Byc3OJ02hbWJbHWePh4XEg3cvo',
 'guest': 'Ian Bremmer',
 'summary': "In this episode, Ian Bremmer discusses the rise of big tech as a third superpower and the potential dangers and opportunities it presents. He highlights the immense power held by tech companies in shaping society, the economy, and national security, emphasizing their sovereignty over the digital world. Bremmer expresses concerns about the growing influence of AI and its potential to outstrip government regulation, leading to a reality where tech companies wield significant power over individuals. He also delves into the risks associated with AI proliferation, including the potential for non-governments to control and misuse the technology, exacerbating social inequalities and disinformation. Bremmer emphasizes the need to address negative externalities and regulate AI to mitigate its adverse impacts. Additionally, he discusses the implications of AI on job displacement and social discontent, particularly for marginalized communities. The conversation delves into the breakdown of truth in the digital age, driven by algorithmic sorting and micro-targeting, leading to fragmented echo chambers and the erosion of consensus on facts. Both Bremmer and the host explore the challenges of navigating truth in a polarized and algorithmically driven information landscape, highlighting the need for critical thinking and a focus on human flourishing as a guiding principle in the face of AI's transformative impact.",
 'content': "You said these are dangerous times. The world order is shifting before our eyes"}

chunk_size = 256
chunk_overlap = 0
encoding = tiktoken.encoding_for_model('gpt-3.5-turbo-0613')
gpt35_txt_splitter = SentenceSplitter(chunk_size=chunk_size, tokenizer=encoding.encode, chunk_overlap=chunk_overlap)


gpt35_txt_splitter(d['content'])