File size: 1,720 Bytes
b30ed6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json

def combine_and_calculate(input_file_path, output_file_path):
    with open(input_file_path, 'r') as file:
        output_data = json.load(file)

    combined_json_list = []

    # Calculate the number of groups to create
    num_groups = (len(output_data) + 7) // 8

    for group_num in range(num_groups):
        # Calculate the starting index and ending index for the current group
        start_index = group_num * 8
        end_index = min(start_index + 8, len(output_data))

        # Extract the "text" values from the current group of dictionaries
        combined_text = " ".join([item["text"] for item in output_data[start_index:end_index]])

        # Calculate the "start" and "end" for the current group
        group_start = output_data[start_index]["start"]
        group_end = output_data[end_index - 1]["end"]

        # Create the combined JSON for the current group
        combined_json = {
            "text": combined_text,
            "start": group_start,
            "end": group_end,
        }

        combined_json_list.append(combined_json)

    # Save the combined JSON list to a new file
    with open(output_file_path, 'w') as output_file:
        json.dump(combined_json_list, output_file)

# Replace 'output_file.json' with the path to the output JSON file you created previously
input_file_path = '/home/bharathi/langchain_experiments/GenAI/transcript_end.json'

# Replace 'combined_output_file.json' with the desired path and filename for the combined JSON file
output_file_path = '/home/bharathi/langchain_experiments/GenAI/chunks.json'

# Call the function to create the combined JSON and save it to a new file
combine_and_calculate(input_file_path, output_file_path)