Create Print_and_save_list_of_contents_within_model.safetensors.py

Browse files

Files changed (1) hide show

Print_and_save_list_of_contents_within_model.safetensors.py +116 -0

Print_and_save_list_of_contents_within_model.safetensors.py ADDED Viewed

	@@ -0,0 +1,116 @@

+#Run from the command line:
+# python Print_and_save_list_of_contents_within_model.safetensors.py model.safetensors
+# The script will create a text file named safetensors_contents.txt (or the name you specify) containing the information about the tensors within your model.safetensors file. This will include the shape (dimensionality), data type (e.g., float32, int64), and potentially even a preview of the data itself if it is an object type, providing insights into the model's structure and parameters. It will print a confirmation message indicating where the file is saved. You will be able to search this text file for lm_head.weight. If you can find lm_head.weight in safetensors_contents.txt, but your original script cannot find it, that is a strong indicator that something may be wrong with your original script's load_weights function.
+import numpy as np
+print(f"NumPy version = {np.__version__}")
+print(np.bfloat16)  # If no error is printed and the version is 1.20.0 or higher then you should be good to go
+print("If no error is printed and the version is 1.20.0 or higher then you should be good to go")
+# The error "module 'numpy' has no attribute 'bfloat16'" indicates that your NumPy library version doesn't support the bfloat16 data type. bfloat16 (Brain Floating Point) is a relatively newer data type, so older NumPy versions won't have it.  Using an older version of numpy may also cause other compatibility problems. The bfloat16 conversion is a last resort if you cannot update numpy.
+# The most straightforward solution is to update NumPy to a version that includes bfloat16 support (NumPy 1.20.0 and later). 1. Update NumPy: pip install --upgrade numpy  or python3 -m pip install --upgrade numpy    to specify the correct Python interpreter for which you want to upgrade NumPy. Or py -m pip install --upgrade numpy
+4. Uninstall and Reinstall:   Completely remove NumPy and then reinstall it:
+# pip uninstall numpy
+# pip install numpy
+# Verify: After the upgrade completes, open a new command prompt or terminal (or restart your current one if you use the same one for Python ) and run the Python interpreter:
+#  2. Force Reinstallation:  Sometimes, a simple upgrade might not replace files correctly. Try a forced reinstallation:
+# pip install --force-reinstall numpy
+# Successfully installed pip-24.3.1
+# The error message "AttributeError: module 'numpy' has no attribute 'bfloat16'. Did you mean: 'float16'?" indicates that standard NumPy installations do not include bfloat16 as a built-in data type. While NumPy does support float16, bfloat16 (Brain Floating Point 16) is a different 16-bit floating-point format commonly used in machine learning for training and inference. It's designed to offer better performance in specific hardware, often sacrificing some precision compared to float32.
+# To use bfloat16 with NumPy, you need to install the bfloat16 package. You can do this using pip:
+# pip install bfloat16
+# This package extends NumPy, adding the bfloat16 dtype and enabling its use in most standard NumPy operations.[1][2] After installation, you should be able to use bfloat16 similar to other NumPy dtypes. For example:
+import numpy as np
+import bfloat16
+# Create a NumPy array with bfloat16 dtype
+x = np.array([1.2, 3.4, 5.6], dtype=bfloat16.bfloat16)
+# Perform operations with the bfloat16 array
+y = x * 2
+print(f"bfloat16: {y}")
+# Convert to other dtypes
+z = y.astype(np.float32)
+print(f"z = y.astype(np.float32) {z}")
+import os
+import sys
+import torch
+from safetensors.numpy import load_file
+def print_and_save_safetensors_contents(weights_path: str, output_file: str = "safetensors_contents.txt"):
+    """
+    Prints and saves the contents of a safetensors file to a text file.
+    Args:
+        weights_path: The path to the safetensors file.
+        output_file: The name of the output text file.
+    """
+    # File Existence and Absolute Path Verification
+    weights_path = os.path.abspath(weights_path)  # Get absolute path
+    if not os.path.exists(weights_path):
+        print(f"Error: Weights file not found at: {weights_path}")
+        sys.exit(1)
+    try:
+        tensors = load_file(weights_path)
+        with open(output_file, "w", encoding="utf-8") as f:
+            for key, tensor in tensors.items():
+                f.write(f"Tensor: {key}\n")
+                f.write(f"  Shape: {tensor.shape}\n")
+                f.write(f"  Dtype: {tensor.dtype}\n")
+                 # describe data characteristics in more detail
+                if tensor.dtype == 'object':
+                    try:
+                         # Attempt to decode as string, handling potential errors
+                        decoded_data = tensor.astype(str).item().decode('UTF-8', errors='replace')
+                        f.write(f"Decoded data: {decoded_data} \n")
+                    except:
+                        f.write("  Data: Cannot be displayed (likely binary or complex object)\n")
+                elif tensor.dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
+                     f.write(f"  Data Type: Integer \n")
+                elif tensor.dtype in [torch.float16, torch.float32, torch.bfloat16]:
+                     f.write(f"  Data Type: Float \n")
+                elif tensor.dtype == torch.bool:
+                    f.write(f" Data Type: Boolean \n")
+                else:
+                    f.write("Data: Cannot determine characteristics. \n")
+                # if key == "lm_head.weight":
+                #      f.write(f"lm_head.weight exists")
+                f.write("\n")  # Add a separator between tensors
+        print(f"Safetensors contents saved to: {output_file}")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python Print_and_save_list_of_contents_within_model.safetensors.py <path_to_model.safetensors>")
+        sys.exit(1)
+    weights_file = sys.argv[1]
+    print_and_save_safetensors_contents(weights_file)