Spaces:
Runtime error
Runtime error
Commit
·
00316bb
1
Parent(s):
e133364
add files
Browse files- my_1_writer.py +98 -0
- my_new_openai.py +0 -44
- my_vectors.py +17 -0
my_1_writer.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MUSS AUFGERÄUMT WERDEN
|
2 |
+
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
|
7 |
+
def split_json_file(input_filepath, lines_per_file=50):
|
8 |
+
"""
|
9 |
+
Splits a JSON file into multiple files, each containing up to 'lines_per_file' lines.
|
10 |
+
|
11 |
+
param input_filepath: The path to the input JSON file.
|
12 |
+
param lines_per_file: The maximum number of lines per output file.
|
13 |
+
"""
|
14 |
+
# Counter for file naming
|
15 |
+
file_counter = 1
|
16 |
+
# Open the input file
|
17 |
+
with open(input_filepath, 'r') as input_file:
|
18 |
+
# Read the lines from the input file
|
19 |
+
lines = input_file.readlines()
|
20 |
+
# Iterate through the lines in chunks of 'lines_per_file'
|
21 |
+
for i in range(0, len(lines), lines_per_file):
|
22 |
+
# Determine the output file name
|
23 |
+
output_filename = f'translate_data/english_{file_counter}.json'
|
24 |
+
# Write the current chunk to the output file
|
25 |
+
with open(output_filename, 'w') as output_file:
|
26 |
+
# Grab the current chunk of lines
|
27 |
+
chunk = lines[i:i+lines_per_file]
|
28 |
+
# Write each line to the output file
|
29 |
+
for line in chunk:
|
30 |
+
output_file.write(line)
|
31 |
+
print(f'Created {output_filename}')
|
32 |
+
# Increment the file counter
|
33 |
+
file_counter += 1
|
34 |
+
|
35 |
+
|
36 |
+
def merge_and_save(list1, list2, dict1, dict2, filename='output.csv'):
|
37 |
+
"""
|
38 |
+
Merges two lists and two dictionaries into a pandas DataFrame according to the specified structure:
|
39 |
+
headers: ['list1', 'list2', 'keys dict1', 'vals dict1', 'keys dict2', 'vals dict2']
|
40 |
+
and saves it as a CSV file.
|
41 |
+
|
42 |
+
Parameters:
|
43 |
+
- list1 (list): First list to merge, contributing to column 'list1'.
|
44 |
+
- list2 (list): Second list to merge, contributing to column 'list2'.
|
45 |
+
- dict1 (dict): First dictionary to merge, keys and values added as separate columns.
|
46 |
+
- dict2 (dict): Second dictionary to merge, keys and values added as separate columns.
|
47 |
+
- filename (str): Filename for the saved CSV file.
|
48 |
+
"""
|
49 |
+
# Combining all elements into a structured list of dictionaries for DataFrame construction
|
50 |
+
data = []
|
51 |
+
dict1_items = list(dict1.items())
|
52 |
+
dict2_items = list(dict2.items())
|
53 |
+
for i in range(len(list1)):
|
54 |
+
row = {
|
55 |
+
'list1': list1[i],
|
56 |
+
'list2': list2[i],
|
57 |
+
'keys dict1': dict1_items[i][0],
|
58 |
+
'vals dict1': dict1_items[i][1],
|
59 |
+
'keys dict2': dict2_items[i][0],
|
60 |
+
'vals dict2': dict2_items[i][1]
|
61 |
+
}
|
62 |
+
data.append(row)
|
63 |
+
|
64 |
+
# Creating the DataFrame
|
65 |
+
df = pd.DataFrame(data)
|
66 |
+
|
67 |
+
# Saving the DataFrame to a CSV file
|
68 |
+
df.to_csv(filename, index=False)
|
69 |
+
print(f"DataFrame saved as '{filename}' in the current directory.")
|
70 |
+
|
71 |
+
|
72 |
+
# new line for every entry
|
73 |
+
def safe_my_dict_as_json(file_name, my_dict):
|
74 |
+
print(my_dict)
|
75 |
+
# Open a file for writing
|
76 |
+
with open(file_name, 'w') as f:
|
77 |
+
# Write the opening brace of the JSON object
|
78 |
+
f.write('{\n')
|
79 |
+
# Get total number of items to control comma insertion
|
80 |
+
total_items = len(my_dict)
|
81 |
+
if type(my_dict) == list:
|
82 |
+
my_dict = my_dict[0]
|
83 |
+
# Iterate over items, keeping track of the current item index
|
84 |
+
for i, (key, value) in enumerate(my_dict.items()):
|
85 |
+
# Serialize the key with JSON to handle special characters and ensure proper quoting
|
86 |
+
json_key = json.dumps(key)
|
87 |
+
# Convert the list to a JSON-formatted string (without indentation)
|
88 |
+
json_value = json.dumps(value)
|
89 |
+
# Determine if a comma is needed (for all but the last item)
|
90 |
+
comma = ',' if i < total_items - 1 else ''
|
91 |
+
# Write the formatted string to the file
|
92 |
+
f.write(f" {json_key}: {json_value}{comma}\n")
|
93 |
+
# Write the closing brace of the JSON object
|
94 |
+
f.write('}\n')
|
95 |
+
|
96 |
+
|
97 |
+
if __name__ == "__main__":
|
98 |
+
print("here are all functions that write to the Datasets")
|
my_new_openai.py
CHANGED
@@ -2,8 +2,6 @@ import os
|
|
2 |
from openai import OpenAI
|
3 |
import requests
|
4 |
import base64
|
5 |
-
from pydub import AudioSegment
|
6 |
-
from moviepy.editor import VideoFileClip
|
7 |
|
8 |
client = OpenAI()
|
9 |
|
@@ -130,29 +128,6 @@ def encode_image_to_base64(image_path):
|
|
130 |
return encoded_string
|
131 |
|
132 |
|
133 |
-
def mp4_to_mp3(video_file_path, audio_file_path):
|
134 |
-
# Load the video file
|
135 |
-
video = VideoFileClip(video_file_path)
|
136 |
-
|
137 |
-
# Extract audio from the video and write it to an MP3 file
|
138 |
-
video.audio.write_audiofile(audio_file_path)
|
139 |
-
|
140 |
-
# Close the video file to free resources
|
141 |
-
video.close()
|
142 |
-
|
143 |
-
print(f"Converted {video_file_path} to {audio_file_path}")
|
144 |
-
|
145 |
-
|
146 |
-
def mp4_audio_to_mp3(mp4_audio_path, mp3_output_path):
|
147 |
-
# Load the MP4 file
|
148 |
-
audio = AudioSegment.from_file(mp4_audio_path, format="mp4")
|
149 |
-
|
150 |
-
# Export as an MP3 file
|
151 |
-
audio.export(mp3_output_path, format="mp3")
|
152 |
-
|
153 |
-
print(f"Converted {mp4_audio_path} to {mp3_output_path}")
|
154 |
-
|
155 |
-
|
156 |
def table_to_text(table=None, prompt="describe this table in plain text. "
|
157 |
"be as precise as possible. spare no detail. "
|
158 |
"what is in this table?", print_out=True):
|
@@ -165,25 +140,6 @@ def table_to_text(table=None, prompt="describe this table in plain text. "
|
|
165 |
return ValueError
|
166 |
|
167 |
|
168 |
-
def danja():
|
169 |
-
#mp4_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp4"
|
170 |
-
|
171 |
-
#mp3_file = "output_audio.mp3"
|
172 |
-
mp3_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp3"
|
173 |
-
|
174 |
-
# mp4_audio_to_mp3(mp4_file, mp3_file)
|
175 |
-
|
176 |
-
# Usage example
|
177 |
-
# mp4_to_mp3(mp4_file, mp3_file)
|
178 |
-
|
179 |
-
audio_file = open(mp3_file, "rb")
|
180 |
-
transcription = client.audio.transcriptions.create(
|
181 |
-
model="whisper-1",
|
182 |
-
file=audio_file
|
183 |
-
)
|
184 |
-
print(transcription.text)
|
185 |
-
|
186 |
-
|
187 |
if __name__ == "__main__":
|
188 |
#print("here are all functions that directly call openai.")
|
189 |
#img_create("a skier in the swiss alps", download_path="skier.png")
|
|
|
2 |
from openai import OpenAI
|
3 |
import requests
|
4 |
import base64
|
|
|
|
|
5 |
|
6 |
client = OpenAI()
|
7 |
|
|
|
128 |
return encoded_string
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def table_to_text(table=None, prompt="describe this table in plain text. "
|
132 |
"be as precise as possible. spare no detail. "
|
133 |
"what is in this table?", print_out=True):
|
|
|
140 |
return ValueError
|
141 |
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
if __name__ == "__main__":
|
144 |
#print("here are all functions that directly call openai.")
|
145 |
#img_create("a skier in the swiss alps", download_path="skier.png")
|
my_vectors.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
def safe_local(vectors, path):
|
4 |
+
pass
|
5 |
+
|
6 |
+
|
7 |
+
def merge_two(vec1, vec2):
|
8 |
+
pass
|
9 |
+
|
10 |
+
|
11 |
+
def load_local(path):
|
12 |
+
pass
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
if __name__ == "__main__":
|
17 |
+
print("you are in the my_vectors")
|