{"cells":[{"cell_type":"markdown","metadata":{"id":"T9u-JTdTpMzM"},"source":["## Dataset"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PQtPVQfKn7h-"},"outputs":[],"source":["from google.colab import data_table; data_table.enable_dataframe_formatter()\n","import numpy as np; np.random.seed(123)\n","import pandas as pd"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"oqsw1ni_sN_b"},"outputs":[],"source":["df = pd.read_json(\"https://raw.githubusercontent.com/sahil280114/codealpaca/master/data/code_alpaca_20k.json\")\n","\n","# create a new column called `split` where:\n","# 90% will be assigned a value of 0 -> train set\n","# 5% will be assigned a value of 1 -> validation set\n","# 5% will be assigned a value of 2 -> test set\n","\n","total_rows = len(df)\n","split_0_count = int(total_rows * 0.9)\n","split_1_count = int(total_rows * 0.05)\n","split_2_count = total_rows - split_0_count - split_1_count\n","\n","# Create an array with split values based on the counts\n","split_values = np.concatenate([\n"," np.zeros(split_0_count),\n"," np.ones(split_1_count),\n"," np.full(split_2_count, 2)\n","])\n","\n","# Shuffle the array to ensure randomness\n","np.random.shuffle(split_values)\n","\n","# Add the 'split' column to the DataFrame\n","df['split'] = split_values\n","df['split'] = df['split'].astype(int)\n","df = df.head(n=1000)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":618},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1703888311059,"user":{"displayName":"Yufei Wang","userId":"15670977446030042093"},"user_tz":480},"id":"tgdeMSpEobGr","outputId":"b2a5cc10-ef79-40ee-a474-50e2cc48e00e"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" instruction \\\n","0 Create an array of length 5 which contains all... \n","1 Formulate an equation to calculate the height ... \n","2 Write a replace method for a string class whic... \n","3 Create an array of length 15 containing number... \n","4 Write a function to find the number of distinc... \n","5 Create a nested loop to print every combinatio... \n","6 Write a function to find the maximum differenc... \n","7 Write a function to generate the nth Fibonacci... \n","8 Write a class to represent a 2D point with x a... \n","9 Write code that removes spaces from a given st... \n","\n"," input \\\n","0 \n","1 \n","2 string = \"Hello World!\"\\nreplace_with = \"Greet... \n","3 \n","4 matrix = [[1, 0, 0],\\n [1, 0, 1],\\n ... \n","5 \n","6 arr = [5, 3, 17, 11, 9] \n","7 < noinput > \n","8 \n","9 string = \" A B C D \" \n","\n"," output split \n","0 arr = [2, 4, 6, 8, 10] 0 \n","1 Height of triangle = opposite side length * si... 0 \n","2 def replace(self, replace_with):\\n new_stri... 0 \n","3 arr = [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33... 0 \n","4 def find_num_distinct_states(matrix):\\n sta... 0 \n","5 for i in range(10):\\n for j in range(10):\\n... 0 \n","6 def max_difference(arr):\\n max_diff = 0\\n ... 0 \n","7 def fib(n):\\n if n == 0:\\n return 0\\... 0 \n","8 class Point:\\n def __init__(self, x, y):\\n ... 0 \n","9 def remove_spaces(string): \\n return \"\".joi... 0 "],"text/html":["\n","
\n"," | instruction | \n","input | \n","output | \n","split | \n","
---|---|---|---|---|
0 | \n","Create an array of length 5 which contains all... | \n","\n"," | arr = [2, 4, 6, 8, 10] | \n","0 | \n","
1 | \n","Formulate an equation to calculate the height ... | \n","\n"," | Height of triangle = opposite side length * si... | \n","0 | \n","
2 | \n","Write a replace method for a string class whic... | \n","string = \"Hello World!\"\\nreplace_with = \"Greet... | \n","def replace(self, replace_with):\\n new_stri... | \n","0 | \n","
3 | \n","Create an array of length 15 containing number... | \n","\n"," | arr = [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33... | \n","0 | \n","
4 | \n","Write a function to find the number of distinc... | \n","matrix = [[1, 0, 0],\\n [1, 0, 1],\\n ... | \n","def find_num_distinct_states(matrix):\\n sta... | \n","0 | \n","
5 | \n","Create a nested loop to print every combinatio... | \n","\n"," | for i in range(10):\\n for j in range(10):\\n... | \n","0 | \n","
6 | \n","Write a function to find the maximum differenc... | \n","arr = [5, 3, 17, 11, 9] | \n","def max_difference(arr):\\n max_diff = 0\\n ... | \n","0 | \n","
7 | \n","Write a function to generate the nth Fibonacci... | \n","< noinput > | \n","def fib(n):\\n if n == 0:\\n return 0\\... | \n","0 | \n","
8 | \n","Write a class to represent a 2D point with x a... | \n","\n"," | class Point:\\n def __init__(self, x, y):\\n ... | \n","0 | \n","
9 | \n","Write code that removes spaces from a given st... | \n","string = \" A B C D \" | \n","def remove_spaces(string): \\n return \"\".joi... | \n","0 | \n","