elia-waefler commited on
Commit
00316bb
·
1 Parent(s): e133364
Files changed (3) hide show
  1. my_1_writer.py +98 -0
  2. my_new_openai.py +0 -44
  3. my_vectors.py +17 -0
my_1_writer.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MUSS AUFGERÄUMT WERDEN
2
+
3
+ import json
4
+ import pandas as pd
5
+
6
+
7
+ def split_json_file(input_filepath, lines_per_file=50):
8
+ """
9
+ Splits a JSON file into multiple files, each containing up to 'lines_per_file' lines.
10
+
11
+ param input_filepath: The path to the input JSON file.
12
+ param lines_per_file: The maximum number of lines per output file.
13
+ """
14
+ # Counter for file naming
15
+ file_counter = 1
16
+ # Open the input file
17
+ with open(input_filepath, 'r') as input_file:
18
+ # Read the lines from the input file
19
+ lines = input_file.readlines()
20
+ # Iterate through the lines in chunks of 'lines_per_file'
21
+ for i in range(0, len(lines), lines_per_file):
22
+ # Determine the output file name
23
+ output_filename = f'translate_data/english_{file_counter}.json'
24
+ # Write the current chunk to the output file
25
+ with open(output_filename, 'w') as output_file:
26
+ # Grab the current chunk of lines
27
+ chunk = lines[i:i+lines_per_file]
28
+ # Write each line to the output file
29
+ for line in chunk:
30
+ output_file.write(line)
31
+ print(f'Created {output_filename}')
32
+ # Increment the file counter
33
+ file_counter += 1
34
+
35
+
36
+ def merge_and_save(list1, list2, dict1, dict2, filename='output.csv'):
37
+ """
38
+ Merges two lists and two dictionaries into a pandas DataFrame according to the specified structure:
39
+ headers: ['list1', 'list2', 'keys dict1', 'vals dict1', 'keys dict2', 'vals dict2']
40
+ and saves it as a CSV file.
41
+
42
+ Parameters:
43
+ - list1 (list): First list to merge, contributing to column 'list1'.
44
+ - list2 (list): Second list to merge, contributing to column 'list2'.
45
+ - dict1 (dict): First dictionary to merge, keys and values added as separate columns.
46
+ - dict2 (dict): Second dictionary to merge, keys and values added as separate columns.
47
+ - filename (str): Filename for the saved CSV file.
48
+ """
49
+ # Combining all elements into a structured list of dictionaries for DataFrame construction
50
+ data = []
51
+ dict1_items = list(dict1.items())
52
+ dict2_items = list(dict2.items())
53
+ for i in range(len(list1)):
54
+ row = {
55
+ 'list1': list1[i],
56
+ 'list2': list2[i],
57
+ 'keys dict1': dict1_items[i][0],
58
+ 'vals dict1': dict1_items[i][1],
59
+ 'keys dict2': dict2_items[i][0],
60
+ 'vals dict2': dict2_items[i][1]
61
+ }
62
+ data.append(row)
63
+
64
+ # Creating the DataFrame
65
+ df = pd.DataFrame(data)
66
+
67
+ # Saving the DataFrame to a CSV file
68
+ df.to_csv(filename, index=False)
69
+ print(f"DataFrame saved as '{filename}' in the current directory.")
70
+
71
+
72
+ # new line for every entry
73
+ def safe_my_dict_as_json(file_name, my_dict):
74
+ print(my_dict)
75
+ # Open a file for writing
76
+ with open(file_name, 'w') as f:
77
+ # Write the opening brace of the JSON object
78
+ f.write('{\n')
79
+ # Get total number of items to control comma insertion
80
+ total_items = len(my_dict)
81
+ if type(my_dict) == list:
82
+ my_dict = my_dict[0]
83
+ # Iterate over items, keeping track of the current item index
84
+ for i, (key, value) in enumerate(my_dict.items()):
85
+ # Serialize the key with JSON to handle special characters and ensure proper quoting
86
+ json_key = json.dumps(key)
87
+ # Convert the list to a JSON-formatted string (without indentation)
88
+ json_value = json.dumps(value)
89
+ # Determine if a comma is needed (for all but the last item)
90
+ comma = ',' if i < total_items - 1 else ''
91
+ # Write the formatted string to the file
92
+ f.write(f" {json_key}: {json_value}{comma}\n")
93
+ # Write the closing brace of the JSON object
94
+ f.write('}\n')
95
+
96
+
97
+ if __name__ == "__main__":
98
+ print("here are all functions that write to the Datasets")
my_new_openai.py CHANGED
@@ -2,8 +2,6 @@ import os
2
  from openai import OpenAI
3
  import requests
4
  import base64
5
- from pydub import AudioSegment
6
- from moviepy.editor import VideoFileClip
7
 
8
  client = OpenAI()
9
 
@@ -130,29 +128,6 @@ def encode_image_to_base64(image_path):
130
  return encoded_string
131
 
132
 
133
- def mp4_to_mp3(video_file_path, audio_file_path):
134
- # Load the video file
135
- video = VideoFileClip(video_file_path)
136
-
137
- # Extract audio from the video and write it to an MP3 file
138
- video.audio.write_audiofile(audio_file_path)
139
-
140
- # Close the video file to free resources
141
- video.close()
142
-
143
- print(f"Converted {video_file_path} to {audio_file_path}")
144
-
145
-
146
- def mp4_audio_to_mp3(mp4_audio_path, mp3_output_path):
147
- # Load the MP4 file
148
- audio = AudioSegment.from_file(mp4_audio_path, format="mp4")
149
-
150
- # Export as an MP3 file
151
- audio.export(mp3_output_path, format="mp3")
152
-
153
- print(f"Converted {mp4_audio_path} to {mp3_output_path}")
154
-
155
-
156
  def table_to_text(table=None, prompt="describe this table in plain text. "
157
  "be as precise as possible. spare no detail. "
158
  "what is in this table?", print_out=True):
@@ -165,25 +140,6 @@ def table_to_text(table=None, prompt="describe this table in plain text. "
165
  return ValueError
166
 
167
 
168
- def danja():
169
- #mp4_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp4"
170
-
171
- #mp3_file = "output_audio.mp3"
172
- mp3_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp3"
173
-
174
- # mp4_audio_to_mp3(mp4_file, mp3_file)
175
-
176
- # Usage example
177
- # mp4_to_mp3(mp4_file, mp3_file)
178
-
179
- audio_file = open(mp3_file, "rb")
180
- transcription = client.audio.transcriptions.create(
181
- model="whisper-1",
182
- file=audio_file
183
- )
184
- print(transcription.text)
185
-
186
-
187
  if __name__ == "__main__":
188
  #print("here are all functions that directly call openai.")
189
  #img_create("a skier in the swiss alps", download_path="skier.png")
 
2
  from openai import OpenAI
3
  import requests
4
  import base64
 
 
5
 
6
  client = OpenAI()
7
 
 
128
  return encoded_string
129
 
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def table_to_text(table=None, prompt="describe this table in plain text. "
132
  "be as precise as possible. spare no detail. "
133
  "what is in this table?", print_out=True):
 
140
  return ValueError
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  if __name__ == "__main__":
144
  #print("here are all functions that directly call openai.")
145
  #img_create("a skier in the swiss alps", download_path="skier.png")
my_vectors.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def safe_local(vectors, path):
4
+ pass
5
+
6
+
7
+ def merge_two(vec1, vec2):
8
+ pass
9
+
10
+
11
+ def load_local(path):
12
+ pass
13
+
14
+
15
+
16
+ if __name__ == "__main__":
17
+ print("you are in the my_vectors")