Spaces:

elia-waefler
/

ki_inselspital

Runtime error

App Files Files Community

elia-waefler commited on May 14, 2024

Commit

00316bb

1 Parent(s): e133364

add files

Browse files

Files changed (3) hide show

my_1_writer.py +98 -0
my_new_openai.py +0 -44
my_vectors.py +17 -0

my_1_writer.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# MUSS AUFGERÄUMT WERDEN
+import json
+import pandas as pd
+def split_json_file(input_filepath, lines_per_file=50):
+    """
+    Splits a JSON file into multiple files, each containing up to 'lines_per_file' lines.
+    param input_filepath: The path to the input JSON file.
+    param lines_per_file: The maximum number of lines per output file.
+    """
+    # Counter for file naming
+    file_counter = 1
+    # Open the input file
+    with open(input_filepath, 'r') as input_file:
+        # Read the lines from the input file
+        lines = input_file.readlines()
+        # Iterate through the lines in chunks of 'lines_per_file'
+        for i in range(0, len(lines), lines_per_file):
+            # Determine the output file name
+            output_filename = f'translate_data/english_{file_counter}.json'
+            # Write the current chunk to the output file
+            with open(output_filename, 'w') as output_file:
+                # Grab the current chunk of lines
+                chunk = lines[i:i+lines_per_file]
+                # Write each line to the output file
+                for line in chunk:
+                    output_file.write(line)
+            print(f'Created {output_filename}')
+            # Increment the file counter
+            file_counter += 1
+def merge_and_save(list1, list2, dict1, dict2, filename='output.csv'):
+    """
+    Merges two lists and two dictionaries into a pandas DataFrame according to the specified structure:
+    headers: ['list1', 'list2', 'keys dict1', 'vals dict1', 'keys dict2', 'vals dict2']
+    and saves it as a CSV file.
+    Parameters:
+    - list1 (list): First list to merge, contributing to column 'list1'.
+    - list2 (list): Second list to merge, contributing to column 'list2'.
+    - dict1 (dict): First dictionary to merge, keys and values added as separate columns.
+    - dict2 (dict): Second dictionary to merge, keys and values added as separate columns.
+    - filename (str): Filename for the saved CSV file.
+    """
+    # Combining all elements into a structured list of dictionaries for DataFrame construction
+    data = []
+    dict1_items = list(dict1.items())
+    dict2_items = list(dict2.items())
+    for i in range(len(list1)):
+        row = {
+            'list1': list1[i],
+            'list2': list2[i],
+            'keys dict1': dict1_items[i][0],
+            'vals dict1': dict1_items[i][1],
+            'keys dict2': dict2_items[i][0],
+            'vals dict2': dict2_items[i][1]
+        }
+        data.append(row)
+    # Creating the DataFrame
+    df = pd.DataFrame(data)
+    # Saving the DataFrame to a CSV file
+    df.to_csv(filename, index=False)
+    print(f"DataFrame saved as '{filename}' in the current directory.")
+# new line for every entry
+def safe_my_dict_as_json(file_name, my_dict):
+    print(my_dict)
+    # Open a file for writing
+    with open(file_name, 'w') as f:
+        # Write the opening brace of the JSON object
+        f.write('{\n')
+        # Get total number of items to control comma insertion
+        total_items = len(my_dict)
+        if type(my_dict) == list:
+            my_dict = my_dict[0]
+        # Iterate over items, keeping track of the current item index
+        for i, (key, value) in enumerate(my_dict.items()):
+            # Serialize the key with JSON to handle special characters and ensure proper quoting
+            json_key = json.dumps(key)
+            # Convert the list to a JSON-formatted string (without indentation)
+            json_value = json.dumps(value)
+            # Determine if a comma is needed (for all but the last item)
+            comma = ',' if i < total_items - 1 else ''
+            # Write the formatted string to the file
+            f.write(f"    {json_key}: {json_value}{comma}\n")
+        # Write the closing brace of the JSON object
+        f.write('}\n')
+if __name__ == "__main__":
+    print("here are all functions that write to the Datasets")

my_new_openai.py CHANGED Viewed

@@ -2,8 +2,6 @@ import os
 from openai import OpenAI
 import requests
 import base64
-from pydub import AudioSegment
-from moviepy.editor import VideoFileClip
 client = OpenAI()
@@ -130,29 +128,6 @@ def encode_image_to_base64(image_path):
     return encoded_string
-def mp4_to_mp3(video_file_path, audio_file_path):
-    # Load the video file
-    video = VideoFileClip(video_file_path)
-    # Extract audio from the video and write it to an MP3 file
-    video.audio.write_audiofile(audio_file_path)
-    # Close the video file to free resources
-    video.close()
-    print(f"Converted {video_file_path} to {audio_file_path}")
-def mp4_audio_to_mp3(mp4_audio_path, mp3_output_path):
-    # Load the MP4 file
-    audio = AudioSegment.from_file(mp4_audio_path, format="mp4")
-    # Export as an MP3 file
-    audio.export(mp3_output_path, format="mp3")
-    print(f"Converted {mp4_audio_path} to {mp3_output_path}")
 def table_to_text(table=None, prompt="describe this table in plain text. "
                    "be as precise as possible. spare no detail. "
                    "what is in this table?", print_out=True):
@@ -165,25 +140,6 @@ def table_to_text(table=None, prompt="describe this table in plain text. "
         return ValueError
-def danja():
-    #mp4_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp4"
-    #mp3_file = "output_audio.mp3"
-    mp3_file = "C:\\Users\\eliaw\\Downloads\\WhatsApp Audio 2024-05-10 at 22.17.12.mp3"
-    # mp4_audio_to_mp3(mp4_file, mp3_file)
-    # Usage example
-    # mp4_to_mp3(mp4_file, mp3_file)
-    audio_file = open(mp3_file, "rb")
-    transcription = client.audio.transcriptions.create(
-        model="whisper-1",
-        file=audio_file
-    )
-    print(transcription.text)
 if __name__ == "__main__":
     #print("here are all functions that directly call openai.")
     #img_create("a skier in the swiss alps", download_path="skier.png")

 from openai import OpenAI
 import requests
 import base64
 client = OpenAI()
     return encoded_string
 def table_to_text(table=None, prompt="describe this table in plain text. "
                    "be as precise as possible. spare no detail. "
                    "what is in this table?", print_out=True):
         return ValueError
 if __name__ == "__main__":
     #print("here are all functions that directly call openai.")
     #img_create("a skier in the swiss alps", download_path="skier.png")

my_vectors.py ADDED Viewed

	@@ -0,0 +1,17 @@

+def safe_local(vectors, path):
+    pass
+def merge_two(vec1, vec2):
+    pass
+def load_local(path):
+    pass
+if __name__ == "__main__":
+    print("you are in the my_vectors")