zxy0307 commited on Mar 10, 2024

Commit

593040d

verified ·

1 Parent(s): 445ace5

Upload 141 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
01_Numpy/.DS_Store +0 -0
01_Numpy/.idea/.gitignore +3 -0
01_Numpy/.idea/Numpy.iml +8 -0
01_Numpy/.idea/inspectionProfiles/Project_Default.xml +6 -0
01_Numpy/.idea/inspectionProfiles/profiles_settings.xml +6 -0
01_Numpy/.idea/misc.xml +4 -0
01_Numpy/.idea/modules.xml +8 -0
01_Numpy/.idea/workspace.xml +70 -0
01_Numpy/01_创建数组1.py +68 -0
01_Numpy/02_创建数组2.py +44 -0
01_Numpy/03_创建数组3.py +48 -0
01_Numpy/04_数组的数据类型.py +34 -0
01_Numpy/05_数组运算.py +17 -0
01_Numpy/06_索引和切片.py +27 -0
01_Numpy/07_列表切片与数组切片.py +23 -0
01_Numpy/08_二维数组的切片操作.py +14 -0
01_Numpy/09_数组的重塑.py +13 -0
01_Numpy/10_数组的转置.py +19 -0
01_Numpy/11_数组的增删改查.py +56 -0
01_Numpy/12_矩阵的操作.py +45 -0
01_Numpy/13_矩阵的运算.py +35 -0
01_Numpy/14_数组的相乘与点积.py +13 -0
01_Numpy/15_矩阵相乘与矩阵元素相乘.py +12 -0
01_Numpy/16_数学运算函数.py +44 -0
01_Numpy/17_统计分析函数.py +58 -0
01_Numpy/18_数组的排序.py +31 -0
01_Numpy/19_应用_图像灰度处理.py +20 -0
01_Numpy/img.jpg +3 -0
02_Pandas/.DS_Store +0 -0
02_Pandas/.idea/.gitignore +3 -0
02_Pandas/.idea/Pandas.iml +8 -0
02_Pandas/.idea/encodings.xml +6 -0
02_Pandas/.idea/inspectionProfiles/Project_Default.xml +6 -0
02_Pandas/.idea/inspectionProfiles/profiles_settings.xml +6 -0
02_Pandas/.idea/misc.xml +4 -0
02_Pandas/.idea/modules.xml +8 -0
02_Pandas/.idea/workspace.xml +75 -0
02_Pandas/01_Pandas初步使用.py +4 -0
02_Pandas/02_Series对象.py +33 -0
02_Pandas/03_DataFrame对象.py +34 -0
02_Pandas/04_DataFrame属性.py +41 -0
02_Pandas/05_DataFrame重要函数.py +16 -0
02_Pandas/06_导入excel数据.py +20 -0
02_Pandas/07_导入各种类型的文件.py +34 -0
02_Pandas/08_数据抽取.py +54 -0
02_Pandas/09_数据的操作.py +116 -0
02_Pandas/10_数据清洗.py +51 -0
02_Pandas/11_索引.py +66 -0
02_Pandas/12_数据的排序.py +31 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+01_Numpy/img.jpg filter=lfs diff=lfs merge=lfs -text

01_Numpy/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

01_Numpy/.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# 默认忽略的文件
+/shelf/
+/workspace.xml

01_Numpy/.idea/Numpy.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

01_Numpy/.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
+  </profile>
+</component>

01_Numpy/.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

01_Numpy/.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

01_Numpy/.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Numpy.iml" filepath="$PROJECT_DIR$/.idea/Numpy.iml" />
+    </modules>
+  </component>
+</project>

01_Numpy/.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,70 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="3bedfeb5-2966-47b5-88f9-48f835bf7127" name="变更" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectId" id="2OB6Ia0gJD1cwizqJbUbm4FKBay" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
+    &quot;last_opened_file_path&quot;: &quot;/Users/macbook/Documents/Numpy&quot;,
+    &quot;settings.editor.selected.configurable&quot;: &quot;preferences.lookFeel&quot;
+  }
+}</component>
+  <component name="RunManager">
+    <configuration name="01_创建数组1" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="Numpy" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/01_创建数组1.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="默认任务">
+      <changelist id="3bedfeb5-2966-47b5-88f9-48f835bf7127" name="变更" comment="" />
+      <created>1681021549906</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1681021549906</updated>
+    </task>
+    <servers />
+  </component>
+</project>

01_Numpy/01_创建数组1.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import numpy as np  # 导入numpy
+# 创建简单数组
+n1 = np.array([1, 2, 3])
+print(n1)
+# 创建一个带小数点的数组
+n2 = np.array([0.1, 0.2, 0.3])
+print(n2)
+# 创建一个二维数组
+n3 = np.array([
+    [1, 2],
+    [3, 4],
+])
+print(n3)
+"""
+1、创建数组的语法：
+    numpy.array(object, dtype=None, copy=True, ndmin=0)
+"""
+# 使用dtype -- 创建数组时，指定数据类型
+n4 = np.array([1, 2, 3], dtype=float)
+print(n4)
+print(n4.dtype)  # 查看数组中元素的类型
+print(type(n4))  # 查看数组本身的类型
+print(type(n4[0]))  # 查看特定元素的类型
+# 使用object -- 进行数据初始化
+lst = [1, 2, 3]
+n5 = np.array(lst, dtype=float)
+print(n5)
+# 使用copy -- 保留原数组中的数据
+n6 = np.array([1, 2, 3])
+n7 = np.array(n5, copy=True)
+n7[0] = 100
+n7[2] = 99
+print(n6)  # 未变化
+print(n7)
+# 使用ndmin -- 指定最小维数
+lst = [1, 2, 3]
+n8 = np.array(lst, ndmin=3)  # 我们要创建3维数组
+print(n8)
+"""
+2、不同方式创建数组
+"""
+# 创建指定维数的空数组
+n9 = np.empty([4, 3], dtype=int)  # [4,3]表示4行3列，数据类型未初始化(可指定)的数组
+print(n9)
+# 创建指定维度的数组，以0填充
+n10 = np.zeros(3)  # 1行3列
+print(n10)  # 输出的结果默认为float
+# 创建指定维度的数组，以1填充
+n11 = np.ones(3)  # 1行3列
+print(n11)  # 输出的结果默认为float
+# 创建指定维度的数组，以指定的数值填充
+n12 = np.full(3, 8)  # # 1行3列，以8填充
+print(n12)
+n13 = np.full((3, 4), 8)  # 3行4列，以8填充
+print(n13)

01_Numpy/02_创建数组2.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+"""
+3、从数值范围创建数组
+"""
+# arange(start, stop, step, dtype=None) 含头不含尾
+n1 = np.arange(1, 11, 2)
+print(n1)
+# linspace(start, stop, num=50, endpoint=True, retstop=False, dtype=None) 创建等差数列
+# endpoint: 是否包含尾部      num：数组的长度    retstop:
+n2 = np.linspace(7500, 10000, 6)
+print(n2)
+# logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None) 创建等比数列
+n3 = np.logspace(0, 63, 64, base=2, dtype='uint64')
+print(n3)
+"""
+4、生成随机数组
+"""
+n4 = np.random.rand(5)  # [0，1)之间的，1行5列的随机小数
+print(n4)
+n5 = np.random.rand(2, 5)  # 2行5列的随机小数
+print(n5)
+# 用于从正态分布中，返回随机生成的数组
+n6 = np.random.randn(3)
+print(n6)
+# 生成一定范围的随机数组
+n7 = np.random.randint(1, 3, 10)  # 包头不包尾，产生10个数
+print(n7)
+n8 = np.random.randint(1, 3, size=(2, 3))  # 包头不包尾，产生2行3列的数组
+print(n8)
+# 生成正态分布的随机数组
+n9 = np.random.normal(0, 0.1, 10)  # 均值，标准差，维数
+print(n9)
+n10 = np.random.normal(0, 0.1, size=(2, 3))  # 均值，标准差，维数
+print(n10)

01_Numpy/03_创建数组3.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import numpy as np
+# asarray使用 -- 从已有的数组中创建数组
+# 通过列表创建数组
+n1 = np.asarray([1, 2, 3, 4])
+print(n1)
+# 通过元组列表创建数组
+n2 = np.asarray([(1, 2, 3), (4, 5, 6), (7, 8, 9)])
+print(n2)
+# 通过元组创建数组
+n3 = np.asarray((1, 2, 3))
+print(n3)
+# 通过元组的元组创建数组
+n4 = np.asarray(((1, 2, 3), (4, 5, 6), (7, 8, 9)))
+print(n4)
+# 通过列表元组创建数组
+n5 = np.asarray(([1, 2, 3], [4, 5, 6], [7, 8, 9]))
+print(n5)
+# 动态数组
+n6 = np.frombuffer(b'juanzijie', dtype='S1')  # S1表示单个字符串是一个字符
+print(n6)
+# 从迭代对象中创建数组对象
+iter = (i for i in range(5))
+n7 = np.fromiter(iter, dtype='int')
+print(n7)
+# empty_like的使用 -- 按照一定的模版，创建数据类型不定的数组
+n8 = np.empty_like([[1, 2], [3, 4]])  # 创建一个2行2列的数组，因为给定的是2行2列
+print(n8)
+# 创建一个以0填充的，2行2列的数组
+n9 = np.zeros_like([[1, 2], [3, 4]])
+print(n9)
+# 创建一个以1填充的，2行2列的数组
+n10 = np.ones_like([[1, 2], [3, 4]])
+print(n10)
+# 创建一个以指定数据填充的，2行2列的数组
+n11 = np.full_like([[1, 2], [3, 4]], 8)
+print(n11)

01_Numpy/04_数组的数据类型.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+Numpy中的数据类型：
+    bool_
+    int_
+    int8
+    int16
+    1nt32
+    int64
+    uint8
+    uint16
+    uint32
+    float_
+    ...
+    datetime64
+"""
+import numpy as np
+n1 = np.array([1, 2, 3, 4], dtype=int)
+print(n1)
+n2 = np.array([1, 2, 3, 4], dtype='int32')
+print(n2)
+n3 = np.array([1, 2, 3, 4], dtype='int_')
+print(n3)
+n4 = np.array([1, 2, 3, 4], dtype='float_')
+print(n4)
+n5 = np.array([1, 2, 3, 4], dtype=float)
+print(n5)
+n6 = np.array(['2021-01-01'], dtype='datetime64')
+print(n6)

01_Numpy/05_数组运算.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import numpy as np
+n1 = np.array([1, 2])
+n2 = np.array([3, 4])
+print(n1)
+print(n2)
+# 数组运算 -- 对应位置的元素进行运算
+print('加法运算', n1 + n2)
+print('减法运算', n1 - n2)
+print('乘法运算', n1 * n2)
+print('除法运算', n1 / n2)
+print('幂运算', n1 ** n2)
+print('比较运算', n1 > n2)
+print('比较运算', n1 < n2)
+print('比较运算', n1 != n2)

01_Numpy/06_索引和切片.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import numpy as np
+# 数组中的索引
+n1 = np.array([1, 2, 3, 4])
+print(n1[1])
+print(n1[-3])
+n2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
+print(n2[0][1])
+print(n2[0, 1])
+print(n2[-2, -3])
+# 数组的切片 [start, stop, step] 含头不含尾
+n3 = np.array([10, 20, 30, 40, 50, 60])
+print(n3[:3])  # 省略起始位置，默认从0开始，步长默认为1，[0,3)
+print(n3[2:5])  # [2,5)
+print(n3[3:])  # 省略结束位置，则默认到最后一个元素
+print(n3[:])  # 获取数组中的所有元素
+# 修改步长
+print(n3[0::2])
+print(n3[1::5])
+# 步长还可以为负数
+print(n3[::-1])  # 逆序
+print(n3[-1:-5:-1])
+print(n3[-1:-6:-2])

01_Numpy/07_列表切片与数组切片.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import numpy as np
+# 数组的切片
+array = np.array([10, 20, 30, 40])
+print(array)
+arr1 = array[1:3]
+print(arr1)
+# 对切片之后的数组进行修改
+arr1[0] = 99
+print(arr1)
+print(array)  # 已经改变
+# 列表的切片
+lst = [10, 20, 30, 40]
+lst1 = lst[1:3]
+print(lst1)
+# 对切片之后的列表进行修改
+lst1[0] = 99
+print(lst1)
+print(lst) # 没有发生修改

01_Numpy/08_二维数组的切片操作.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import numpy as np
+arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
+print(arr)
+print(arr[1, 2])  # 1表示索引为1的行，2表示索引为2的列
+print(arr[:2, 1:])  # [0,2)的行，[1,3]的列
+print(arr[1, :2])  # 索引为1的行，[0,2)的列
+print(arr[:2, 2])  # 索引为[0,2)的行，索引为2的列
+print(arr[:, :1])   # 全部行，索引为0的列

01_Numpy/09_数组的重塑.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import numpy as np
+n = np.arange(6)
+print(n)
+# 重塑 -- 元素个数必须相同，否则报错
+n1 = n.reshape(2, 3)
+print(n1)
+n = np.array([[1, 2, 3], [4, 5, 6]])
+print(n)
+n1 = n.reshape(3,2) # 重塑
+print(n1)

01_Numpy/10_数组的转置.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import numpy as np
+n = np.arange(24).reshape(4, 6)
+print(n)
+# 转置
+n1 = n.T
+print(n1)
+n2 = n.transpose()
+print(n2)
+# 转置练习
+arr1 = np.array([['A', 100], ['B', 200], ['C', 300], ['D', 400], ['E', 500]])
+print(arr1)
+arr2 = arr1.T
+print(arr2)

01_Numpy/11_数组的增删改查.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import numpy as np
+n1 = np.array([[1, 2], [3, 4], [5, 6]])
+# 创建第二个数组
+n2 = np.array([[10, 20], [30, 40], [50, 60]])
+print(n1)
+print(n2)
+"""
+1、数组的增加
+"""
+# 水平方向增加数据 -- 列的个数增加
+print(np.hstack((n1, n2)))
+# 垂直方向增加数据 -- 行的个数增加
+print(np.vstack((n1, n2)))
+"""
+2、数组的删除
+"""
+n1 = np.array([[1, 2], [3, 4], [5, 6]])
+print(n1)
+# 删除第3行 -- 0号轴，索引为2的数据
+n2 = np.delete(n1, 2, axis=0)
+print(n2)
+# 删除第1列
+n3 = np.delete(n1, 0, axis=1)
+print(n3)
+# 删除第1行和第3行
+n4 = np.delete(n1, (0, 2), axis=0)
+print(n4)
+"""
+3、数组的修改
+"""
+n1 = np.array([[1, 2], [3, 4], [5, 6]])
+n1[1] = [30, 40]  # 修改一行
+print(n1)
+n1[2][1] = 88  # 修改单个元素
+print(n1)
+"""
+4、数组的查询
+"""
+n = np.arange(1, 11)
+print(n)
+n2 = n[np.where(n > 5)]  # 获取到数组中所有大于5的元素, 返回索引
+print(n2)
+n3 = np.where(n > 5, 2, 0) # 数组中元素大于5，输出2，否则输出0
+print(n3)

01_Numpy/12_矩阵的操作.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import numpy as np
+"""
+1、创建简单矩阵
+"""
+a = np.mat('5 6;7 8')
+print(a)
+b = np.mat([[5, 6], [7, 8]])
+print(b)
+print(type(a), type(b))  # 矩阵
+n1 = np.array([[5, 6], [7, 8]])
+print(type(n1))  # 数组
+"""
+2、使用mat函数创建常见的矩阵
+"""
+# 创建3*3的零矩阵
+d1 = np.mat(np.zeros((3, 3)))
+print(d1)
+# 创建2*4的1矩阵
+d2 = np.mat(np.ones((2, 4)))
+print(d2)
+# 创建[0,1)随机矩阵
+d3 = np.mat(np.random.rand(3, 4))
+print(d3)
+# 创建[1,8)随机矩阵
+d4 = np.mat(np.random.randint(1, 8, size=(3, 5)))
+print(d4)
+# 对角矩阵
+d5 = np.mat(np.eye(5, 5))
+print(d5)
+# 对角线矩阵
+d6 = np.mat(np.diag([1, 2, 3]))
+print(d6)
+d7 = np.mat(np.diag([7, 8, 9]))
+print(d7)

01_Numpy/13_矩阵的运算.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+矩阵的加、减、除运算
+"""
+import numpy as np
+# 创建矩阵
+d1 = np.mat([[1, 2], [3, 4], [5, 6]])
+print(d1)
+d2 = np.mat([1, 2])
+print(d2)
+# 矩阵的加法运算
+d3 = d1 + d2
+print(d3)
+# 矩阵的减法运算
+print(d1 - d2)
+# 矩阵的除法运算
+print(d1 / d2)
+# 矩阵的乘法运算
+# error: print(d1 * d2)
+d1 = np.mat([[1, 2], [3, 4], [5, 6]])
+d2 = np.mat([[1, 2], [3, 4]])
+print(d1 * d2)
+# 矩阵的转置
+n1 = np.mat('1 3 3;4 5 6;7 12 9')
+print(n1)
+print(n1.T)
+# 矩阵的求逆
+print(n1.I)

01_Numpy/14_数组的相乘与点积.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import numpy as np
+# 数组的相乘和数组的点积
+n1 = np.array([[1, 2], [3, 4], [5, 6]])
+n2 = np.array([1, 2])
+print(n1)
+print(n2)
+print('数组的乘积', n1 * n2)
+print("数组的点积", np.dot(n1, n2)) # 相乘之后，各行相加

01_Numpy/15_矩阵相乘与矩阵元素相乘.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import numpy as np
+# 矩阵的点积与矩阵元素之间的相乘运算
+n1 = np.mat('1 3 3;4 5 6;7 12 9')
+n2 = np.mat('2 6 6;8 10 12;14 24 18')
+print(n1)
+print(n2)
+print('矩阵相乘的结果：', n1 * n2)
+print('矩阵对应元素相乘：', np.multiply(n1, n2))

01_Numpy/16_数学运算函数.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+n1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+n2 = np.array([10, 10, 10])
+print(n1)
+print(n2)
+# 数组的加减乘除
+print('加', np.add(n1, n2))
+print(n1 + n2)
+print('减', np.subtract(n1, n2))
+print('乘', np.multiply(n1, n2))
+print('除', np.divide(n1, n2))
+# 倒数
+n1 = np.array([0.25, 1.75, 2, 100])
+print(np.reciprocal(n1))
+# 幂运算
+n1 = np.array([10, 100, 1000])
+n2 = np.array([1, 2, 3])
+print(np.power(n1, n2))  # n1的n2次幂
+# 余数    余数 = a - n * (a // n) -- a是除数，n是被除数
+n1 = np.array([10, 20, 30])
+n2 = np.array([4, 5, -8])
+print(np.mod(n1, n2))
+# a是30，n是-8
+# 四舍五入
+n = np.array([1.55, 6.823, 100, 0.1189, 3.1315, -3.456])
+print(n)
+print(np.around(n))  # 默认取整
+print(np.around(n, decimals=2))  # 保留2位小数
+print(np.around(n, decimals=-1))  # 取整到小数点左侧
+# 向上取整与向下取整
+print(np.ceil(n))  # 向上取整
+print(np.floor(n))  # 向下取整
+# 三角函数
+n = np.array([0, 30, 45, 60, 90])
+print(np.sin(n / 180 * np.pi))

01_Numpy/17_统计分析函数.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import numpy as np
+n = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+print(n)
+# 整个数组求和
+print(n.sum())
+# 数组元素行的和
+print(n.sum(axis=0))
+# 数组元素列的和
+print(n.sum(axis=1))
+# 平均值
+print(n.mean())
+# 行的平均值
+print(n.mean(axis=0))
+# 列的平均值
+print(n.mean(axis=1))
+# 最大值
+print(n.max())
+# 行的最大值
+print(n.max(axis=0))
+# 列的最大值
+print(n.max(axis=1))
+# 最小值
+print(n.min())
+# 行的最小值
+print(n.min(axis=0))
+# 列的最小值
+print(n.min(axis=1))
+# 加权平均
+# 数据 * 出现的次数 -> 求平均
+n1 = np.array([1, 2, 3, 4, 5])  # 数组中的数据
+n2 = np.array([10, 20, 30, 40, 50])  # 表示各个数出现的次数
+print(np.average(n1, weights=n2))
+# 中位数 -- 数组有序
+n1 = np.array([1, 2, 3, 4, 5, 99])
+print(np.median(n1))
+# 方差 -- 各组数据 与 平均数 差的平方
+n = np.array([1, 2, 3, 4, 5, 6])
+print(n.var())
+# 标准差/均方差 -- 方差的平方根
+n = np.array([1, 2, 3, 4, 5, 6])
+print(n.std())

01_Numpy/18_数组的排序.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# 数组的排序
+import numpy as np
+n = np.array([[4, 7, 3], [2, 8, 5], [9, 1, 6]])
+print(n)
+print('数组排序', np.sort(n))  # 排序默认升序，按列排序
+# 按行排序
+print(np.sort(n, axis=0))
+# 按列排序
+print(np.sort(n, axis=1))
+# argsort() 排序
+x = np.array([9, 4, 7, 2, 4, 8, 3])
+y = np.argsort(x)
+print(y)  # 得到升序排序后的索引值
+# 排序后重构数组
+print(x[y])
+# lexsort() 排序
+math = np.array([101, 109, 115, 108, 118, 112, 118])
+english = np.array([117, 105, 118, 98, 109, 98, 120])
+total = np.array([621, 623, 620, 620, 615, 615, 450])
+sort_total = np.lexsort((english, math, total))  # 先按总分，再比数学，最后比英语
+print(sort_total)  # 排序后的索引值
+lst = [[total[i], math[i], english[i]] for i in sort_total]
+n = np.array(lst)
+print(n)

01_Numpy/19_应用_图像灰度处理.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import numpy as np
+import matplotlib.pyplot as plt
+# 读取图片
+n1 = plt.imread('img.jpg')
+# 传入数组，显示对应颜色
+plt.imshow(n1)
+print(n1)
+n2 = np.array([0.299, 0.587, 0.114, 1])
+# 数组的点乘运算
+x = np.dot(n1, n2)
+# 传入数组显示灰度
+plt.imshow(x, cmap='gray')
+# 显示图像
+plt.show()

01_Numpy/img.jpg ADDED Viewed

Git LFS Details

SHA256: b6944ab01292d81ed3f0b00f22a2191d8a25a6cbea91cadb083b09dd7d1a7520
Pointer size: 132 Bytes
Size of remote file: 1.17 MB

02_Pandas/.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

02_Pandas/.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# 默认忽略的文件
+/shelf/
+/workspace.xml

02_Pandas/.idea/Pandas.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

02_Pandas/.idea/encodings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding">
+    <file url="file://$PROJECT_DIR$/resources/京东鞋子评论数据.csv" charset="GBK" />
+  </component>
+</project>

02_Pandas/.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
+  </profile>
+</component>

02_Pandas/.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

02_Pandas/.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

02_Pandas/.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Pandas.iml" filepath="$PROJECT_DIR$/.idea/Pandas.iml" />
+    </modules>
+  </component>
+</project>

02_Pandas/.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,75 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="37e61199-e056-4c84-b201-bf4ad9d76cef" name="变更" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectId" id="2OBzO8pbPOosxZakYpmH3wV9uUP" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
+    &quot;last_opened_file_path&quot;: &quot;/Users/macbook/Documents/Pandas&quot;
+  }
+}</component>
+  <component name="RecentsManager">
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/resources" />
+      <recent name="$PROJECT_DIR$" />
+    </key>
+  </component>
+  <component name="RunManager">
+    <configuration name="01_Pandas初步使用" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="Pandas" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/01_Pandas初步使用.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="默认任务">
+      <changelist id="37e61199-e056-4c84-b201-bf4ad9d76cef" name="变更" comment="" />
+      <created>1681048727675</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1681048727675</updated>
+    </task>
+    <servers />
+  </component>
+</project>

02_Pandas/01_Pandas初步使用.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import pandas as pd
+data = pd.read_excel('resources/中超赛事表.xlsx')
+print(data)

02_Pandas/02_Series对象.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""
+pd.Series(data, index)
+    data：数据
+    index：索引
+"""
+import pandas as pd
+d1 = ['李光地', '张红云', '王鹏']
+s = pd.Series(data=d1, index=[1, 2, 3])  # 数据和索引，索引默认从0开始
+print(s)
+print(type(s))
+# 指定数据和索引
+d2 = [90, 98, 87]
+index = ['张三', '李四', '王五']
+s = pd.Series(data=d2, index=index)
+print(s)
+print(s['张三'])  # 标签索引
+print(s[['张三', '王五']])
+print(s['张三': '王五']) # 切片索引 -- 含头含尾
+print("=========================")
+print(s.index)
+print(list(s.index))
+print(s.values) # array类型
+print(type(s.values))
+print("=========================")
+data = ['李光地', '张红云', '王鹏']
+s = pd.Series(data=data)
+print(s)
+print(s[0])  # 位置索引
+print(s[0:2:1])  # 切片索引 -- 含头不含尾

02_Pandas/03_DataFrame对象.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+pd.DataFrame(data, index, columns, dtype)
+    data: 数据
+    index: 行索引
+    columns: 列索引
+    dtype: 数据类型
+"""
+import pandas as pd
+# 列表方式创建DataFrame对象
+data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
+columns = ['名称', '单价', '数量']
+df = pd.DataFrame(data=data, columns=columns)
+print(df)
+print(type(df))
+# 字典方式创建DataFrame对象 -- 要求列表长度一致
+data = {
+    '名称': ['小太阳', '鼠标', '小刀'],
+    '单价': [320.9, 150.3, 1.5],
+    '数量': [100, 50, 200]
+}
+df = pd.DataFrame(data=data)
+print(df)
+# 自动填充
+data = {
+    '名称': ['小太阳', '鼠标', '小刀'],
+    '单价': [320.9, 150.3, 1.5],
+    '数量': [100, 50, 200],
+    '公司': '东门超市'
+}
+df = pd.DataFrame(data=data)
+print(df)

02_Pandas/04_DataFrame属性.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""
+DataFrame对应属性：
+    T: 行列数据转换
+    head: 查看前N条数据，默认5条
+    tail: 查看后N条数据，默认5条
+    shape：查看行数和列数，shape[0]表示行，shape[1]表示列
+    info: 查看索引，数据类型，内存信息
+"""
+import pandas as pd
+data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
+columns = ['名称', '单价', '数量']
+df = pd.DataFrame(data=data, columns=columns)
+print(df)
+print('查看所有元素的值\n', df.values)
+print('查看所有元素的类型\n', df.dtypes)
+print('查看所有行名称\n', list(df.index))
+df.index = [1, 2, 3]  # 修改行索引名称
+print(df)
+print('查看所有列索引\n', list(df.columns))
+df.columns = ['商品名称', '最新单价', '实时数量']  # 修改列索引名称
+print(df)
+# 行列数据的转换
+pd.set_option('display.unicode.east_asian_width', True)  # 规整格式
+new_df = df.T
+print(new_df)
+print('查看前N条数据\n', df.head(1))
+print('查看后N条数据\n', df.tail(1))
+print('行', df.shape[0], '   列', df.shape[1])  # 行列数
+print('查看索引，数据类型，内存信息\n', df.info)

02_Pandas/05_DataFrame重要函数.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import pandas as pd
+data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
+columns = ['名称', '单价', '数量']
+df = pd.DataFrame(data=data, columns=columns)
+print(df)
+print(df.describe()) # 描述信息
+print(df.count()) # 非空值的个数
+print(df.sum()) # 求和
+print(df.max()) # 最大值 -- 中文的比较 -> 翻译成英文后排序
+print(df.min()) # 最小值

02_Pandas/06_导入excel数据.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+pd.read_excel(io, sheet_name, header):  导入.xls或.xlsx文件
+    io: 表示.xls或.xlsx的 文件路径 或 文件对象
+    sheet_name: 表示工作表 -- 一个excel有多个工作表，可以写表名或序号，序号从0开始。None表示读取所有工作表
+    header: 默认值为0，取第一行为列名，如果第一行不为列名，则设置header=None
+"""
+import pandas as pd
+excel = pd.read_excel('resources/京东鞋子评论信息.xlsx', sheet_name='码数分析', header=None)
+print(excel)
+# 导入一列数据
+excel = pd.read_excel('resources/02微机原理学员成绩统计.xlsx', sheet_name='02微机原理及格学员名单', usecols=[1])
+print(excel)
+# 导入多个列数据
+pd.set_option('display.unicode.east_asian_width', True)
+excel = pd.read_excel("resources/02微机原理学员成绩统计.xlsx", sheet_name='02微机原理及格学员名单',
+                      usecols=['姓名', '总成绩'])
+print(excel)

02_Pandas/07_导入各种类型的文件.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import pandas as pd
+"""
+导入CSV/txt文件：
+    pd.read_csv(filepath_or_buffer, sep=',', header, encoding=None)
+        filepath_or_buffer: 字符串、文件路径，也可以是URL链接
+        sep：字符串、分隔符
+        header：指定作为列名的行，默认是0，如果没有，需指定为None
+        encoding:编码
+"""
+pd.set_option('display.unicode.east_asian_width', True)
+csv = pd.read_csv('resources/京东鞋子评论数据.csv', sep=',', header=0, encoding="GBK")
+print(csv)
+# 导入txt文件
+txt = pd.read_csv('resources/rating.txt', sep='\t', header=None, encoding="GBK")
+print(txt)
+"""
+导入HTML网页 -- 只能读取table标签里的内容
+    pd.read_html(io, match='', flavor, header, encoding)
+        io: 字符串、文件路径，也可以是URL链接，不支持https
+        match: 正则表达式
+        flavor: 解释器默认为'lxml'
+"""
+url = 'http://www.espn.com/nba/salaries'
+df = pd.DataFrame()  # 创建空的DataFrame对象
+# DataFrame添加数据
+df = df._append(pd.read_html(url, header=0))
+print(df)
+# 保存成CSV文件
+df.to_csv('NBA_salary.csv', index=False) # 不要索引

02_Pandas/08_数据抽取.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import pandas as pd
+data = [[45, 65, 100], [56, 45, 50], [67, 67, 67]]
+index = ['张三', '李四', '王五']
+columns = ['数学', '语文', '英语']
+pd.set_option('display.unicode.east_asian_width', True)
+df = pd.DataFrame(data=data, index=index, columns=columns)
+print(df)
+"""
+1、提取行数据
+"""
+# 提取行数据
+print(df.loc['张三'])  # 行索引名称
+print(df.iloc[0])  # 行索引编号
+# 提取多行数据
+print(df.loc[['张三', '王五']])
+print(df.iloc[[0, 2]])
+# 提取连续的多行数据
+print(df.loc['张三':'王五'])  # 行索引名称，包头包尾
+print(df.iloc[0:2])  # 行索引编号，包头不包尾
+print(df.iloc[1::])  # (start: stop: step)
+"""
+2、提取列数据
+"""
+# 提取列数据
+print(df[['数学', '英语']])  # 使用列名提取
+print(df.loc[:, ['数学', '英语']])  # ','左侧表示行，右侧表示列
+print(df.iloc[:, [0, 2]])  # 使用列序号
+# 提取连续的列
+print(df.loc[:, '语文':])
+print(df.iloc[:, 1:])
+"""
+3、提取区域数据
+"""
+# 提取区域数据
+print(df.loc[['张三', '王五'], ['数学', '语文']])
+print(df.iloc[[0, 2], 0:2]) # ','左侧表示行，右侧表示列 -- 连续时用:  不连续时用[]
+print(df.iloc[:,0]) # 所有行的第一列数据
+"""
+4、提取指定条件的数据
+"""
+print(df.loc[df['语文'] > 60]) # 提取语文大于60的数据
+print(df.loc[(df['语文'] > 60) & (df['数学'] > 60)]) # 提取语文和数学都大于60的数据

02_Pandas/09_数据的操作.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import pandas as pd
+data = [[45, 65, 100], [56, 45, 50], [67, 67, 67]]
+index = ['张三', '李四', '王五']
+columns = ['数学', '语文', '英语']
+pd.set_option('display.unicode.east_asian_width', True)
+df = pd.DataFrame(data=data, index=index, columns=columns)
+print(df)
+"""
+1、按列增加数据
+"""
+# 采用直接赋值的方式
+df['政治'] = [90, 89, 100]
+print(df)
+# 使用loc属性
+df.loc[:, '化学'] = [30, 89, 75]
+print(df)
+# 在指定索引位置加入一列
+lst = [79, 85, 69]
+df.insert(1, '历史', lst)
+print(df)
+"""
+2、按行增加数据
+"""
+df.loc['陈六'] = [87, 56, 79, 0, 0, 0]  # 添加一行
+print(df)
+# 添加多行
+new_df = pd.DataFrame(
+    data={
+        '数学': [78, 90],
+        '历史': [56, 67],
+        '语文': [78, 65],
+        '英语': [90, 87],
+        '政治': [78, 68],
+        '化学': [90, 78]
+    },
+    index=['张丽丽', '王一一']
+)
+df = df._append(new_df)
+print(df)
+"""
+3、修改数据
+"""
+# 修改列标题 -- 直接使用columns属性
+df.columns = ['数学（上）', '历史', '语文', '英语（下）', '政治', '化学']
+print(df)
+# 修改列标题 -- 使用rename函数，  inplace:是否直接修改DataFrame
+df.rename(columns={'数学（上）': '数学（下）'}, inplace=True)
+print(df)
+# 修改行标题 -- 直接赋值
+df.index = list('123456')
+print(df)
+# 修改行标题 -- rename方法
+df.rename({'1': '一', '2': '二'}, inplace=True, axis=0)
+print(df)
+# 修改行数据
+df.loc['3'] = [100, 100, 100, 100, 100, 100]
+print(df)
+df.iloc[0, :] = [100, 100, 100, 100, 100, 100]  # 修改0行 所有数据
+print(df)
+# 修改列数据
+df.loc[:, '数学（下）'] = [120, 120, 120, 120, 120, 120]
+print(df)
+df.iloc[:, 0] = [90, 90, 90, 90, 90, 90]
+print(df)
+# 修改某一处数据
+df.loc['4', '语文'] = 150
+print(df)
+df.iloc[3, 2] = 250  # 按索引
+print(df)
+"""
+4、数据的删除
+"""
+# 删除列
+df.drop(['数学（下）'], axis=1, inplace=True)
+print(df)
+df.drop(columns='历史', inplace=True)
+print(df)
+df.drop(labels='化学', axis=1, inplace=True)
+print(df)
+# 删除行
+df.drop(['6'], axis=0, inplace=True)
+print(df)
+df.drop(index='5', inplace=True)
+print(df)
+df.drop(labels='4', axis=0, inplace=True)
+print(df)
+# 带条件的删除，删除数学成绩大于90 的第二个数据
+print(df[df['政治'] > 90])
+print(df[df['政治'] > 90].index)
+df.drop(df[df['政治'] > 90].index[1], inplace=True)
+print(df)

02_Pandas/10_数据清洗.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import pandas as pd
+# 显示完整数据
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 100)
+pd.set_option('display.width', 1000)
+pd.set_option('display.unicode.east_asian.width', True)
+df = pd.read_excel('resources/msb课程记录.xls')
+print(df)
+print('==========================')
+# 查看缺失值
+print(df.info())
+print('==========================')
+# 判断缺失值
+print(df.isnull())  # 不为NaN时为False
+print('==========================')
+print(df.notnull())  # 不为NaN时为True
+print('==========================')
+# 对缺失值的处理 -- 删除
+df = pd.read_excel('resources/msb课程记录.xls')
+print(df['课程总数量'].notnull())
+print(df[df['课程总数量'].notnull()])  # 将'课程总数量'缺失的行删除
+print('==========================')
+df = df.dropna()  # 将有缺失值的行删除
+print(df)
+print('==========================')
+# 对缺失值的处理 -- 填充
+df = pd.read_excel('resources/msb课程记录.xls')
+df['课程总数量'] = df['课程总数量'].fillna(0)  # 如果'课程总数量'缺失，则填充为0
+print(df)
+print('==========================')
+# 对重复值处理 -- 删除
+print(df.duplicated())  # 判断重复值是否存在 -- 各列完全一样
+df = df.drop_duplicates()  # 去除所有重复数据 -- 各个数据完全一样
+print(df)
+df = df.drop_duplicates(['买家实际支付金额'], keep='last')  # 删除'买家实际支付金额'的重复数据 -- 保留重复行中的最后一行
+print(df)
+# 直接删除，保留副本
+df1 = df.drop_duplicates(['课程总数量'], inplace=False)
+print(df1) # 新数据是删除之后的结果
+print(df) # 原本的数据不变

02_Pandas/11_索引.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import pandas as pd
+s1 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
+print(s1)
+s2 = pd.Series([2, 3, 4], index=['a', 'b', 'c'])
+print(s2)
+print(s1 + s2)  # 同索引的元素相加
+"""
+1、Series重新设置索引
+"""
+# 重新设置索引
+s = pd.Series([11, 22, 33], index=[1, 2, 3])
+print(s)
+print(s.reindex(range(1, 6)))  # 多出来两个为NaN
+# 使用0进行填充
+print(s.reindex(range(1, 6), fill_value=0))
+# 向前填充和向后填充
+print(s.reindex(range(1, 6), method='ffill'))  # 向前填充，出现NaN时，和前一个索引的值保持一致
+print(s.reindex(range(1, 6), method='bfill'))  # 向后填充，出现NaN时，和后一个索引的值保持一致
+"""
+2、DataFrame对象重新设置索引
+"""
+data = [[90, 100, 90], [100, 39, 89], [49, 79, 87]]
+index = ['msb1001', 'msb1002', 'msb1003']
+columns = ['数学', '语文', '英语']
+pd.set_option('display.unicode.east_asian.width', True)
+df = pd.DataFrame(data=data, index=index, columns=columns)
+print(df)
+# 重新设置行索引
+print(df.reindex(['msb1001', 'msb1002', 'msb1003', 'msb1004', 'msb1005']))
+# 重新设置列索引
+print(df.reindex(columns=['数学', '语文', '英语', '政治', '历史']))
+# 同时设置行索引和列索引
+print(df.reindex(index=['msb1001', 'msb1002', 'msb1003', 'msb1004', 'msb1005', 'msb1006'],
+                 columns=['数学', '语文', '英语', '政治', '历史', '地理'],fill_value=0))
+"""
+3、设置某列为行索引
+"""
+# 显示完整数据
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 100)
+pd.set_option('display.width', 1000)
+excel = pd.read_excel('resources/msb课程记录.xls')
+print(excel)
+excel = excel.set_index(['买家会员名']) # 以'买家会员名'为列索引
+print(excel)
+"""
+4、数据清洗之后，设置连续的索引
+"""
+excel = pd.read_excel('resources/msb课程记录.xls')
+print(excel.dropna()) # 清理NaN数据
+print(excel.dropna().reset_index(drop=True)) # 重新设置索引

02_Pandas/12_数据的排序.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+DataFrame的排序：
+    df.sort_values(by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False)
+        by: 要排序的列表名称
+        axis: 轴，0表示行，1表示列
+        ascending: 升序或者降序
+        inplace: 如果值为True，则就地排序
+        kind: 指定排序算法，'quicksort'快速排序，'mergesort'混合排序，'heapsort'堆排
+        na_position: 空值的位置，'first'时空值排在前面，'last'时NaN排在后面
+        ignore_index: 是否忽略索引，True则标记索引，False则忽略索引
+"""
+import pandas as pd
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 100)
+pd.set_option('display.width', 1000)
+pd.set_option('display.unicode.east_asian.width', True)
+excel = pd.read_excel('resources/电脑配件销售记录.xlsx')
+print(excel.head())  # 显示前5条
+# 排序
+excel = excel.sort_values(by='成交金额')  # 默认升序
+print(excel.head())
+excel = excel.sort_values(by='成交金额', ascending=False)  # 改成降序
+print(excel.head())
+# 根据多列进行排序，数量和成交金额 -- 先比较数量，再比较成交金额
+excel = excel.sort_values(by=['数量','成交金额'], ascending=False)
+print(excel)