Upload 141 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- 01_Numpy/.DS_Store +0 -0
- 01_Numpy/.idea/.gitignore +3 -0
- 01_Numpy/.idea/Numpy.iml +8 -0
- 01_Numpy/.idea/inspectionProfiles/Project_Default.xml +6 -0
- 01_Numpy/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- 01_Numpy/.idea/misc.xml +4 -0
- 01_Numpy/.idea/modules.xml +8 -0
- 01_Numpy/.idea/workspace.xml +70 -0
- 01_Numpy/01_创建数组1.py +68 -0
- 01_Numpy/02_创建数组2.py +44 -0
- 01_Numpy/03_创建数组3.py +48 -0
- 01_Numpy/04_数组的数据类型.py +34 -0
- 01_Numpy/05_数组运算.py +17 -0
- 01_Numpy/06_索引和切片.py +27 -0
- 01_Numpy/07_列表切片与数组切片.py +23 -0
- 01_Numpy/08_二维数组的切片操作.py +14 -0
- 01_Numpy/09_数组的重塑.py +13 -0
- 01_Numpy/10_数组的转置.py +19 -0
- 01_Numpy/11_数组的增删改查.py +56 -0
- 01_Numpy/12_矩阵的操作.py +45 -0
- 01_Numpy/13_矩阵的运算.py +35 -0
- 01_Numpy/14_数组的相乘与点积.py +13 -0
- 01_Numpy/15_矩阵相乘与矩阵元素相乘.py +12 -0
- 01_Numpy/16_数学运算函数.py +44 -0
- 01_Numpy/17_统计分析函数.py +58 -0
- 01_Numpy/18_数组的排序.py +31 -0
- 01_Numpy/19_应用_图像灰度处理.py +20 -0
- 01_Numpy/img.jpg +3 -0
- 02_Pandas/.DS_Store +0 -0
- 02_Pandas/.idea/.gitignore +3 -0
- 02_Pandas/.idea/Pandas.iml +8 -0
- 02_Pandas/.idea/encodings.xml +6 -0
- 02_Pandas/.idea/inspectionProfiles/Project_Default.xml +6 -0
- 02_Pandas/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- 02_Pandas/.idea/misc.xml +4 -0
- 02_Pandas/.idea/modules.xml +8 -0
- 02_Pandas/.idea/workspace.xml +75 -0
- 02_Pandas/01_Pandas初步使用.py +4 -0
- 02_Pandas/02_Series对象.py +33 -0
- 02_Pandas/03_DataFrame对象.py +34 -0
- 02_Pandas/04_DataFrame属性.py +41 -0
- 02_Pandas/05_DataFrame重要函数.py +16 -0
- 02_Pandas/06_导入excel数据.py +20 -0
- 02_Pandas/07_导入各种类型的文件.py +34 -0
- 02_Pandas/08_数据抽取.py +54 -0
- 02_Pandas/09_数据的操作.py +116 -0
- 02_Pandas/10_数据清洗.py +51 -0
- 02_Pandas/11_索引.py +66 -0
- 02_Pandas/12_数据的排序.py +31 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
01_Numpy/img.jpg filter=lfs diff=lfs merge=lfs -text
|
01_Numpy/.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
01_Numpy/.idea/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# 默认忽略的文件
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
01_Numpy/.idea/Numpy.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
01_Numpy/.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5 |
+
</profile>
|
6 |
+
</component>
|
01_Numpy/.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
01_Numpy/.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
01_Numpy/.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/Numpy.iml" filepath="$PROJECT_DIR$/.idea/Numpy.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
01_Numpy/.idea/workspace.xml
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="AutoImportSettings">
|
4 |
+
<option name="autoReloadType" value="SELECTIVE" />
|
5 |
+
</component>
|
6 |
+
<component name="ChangeListManager">
|
7 |
+
<list default="true" id="3bedfeb5-2966-47b5-88f9-48f835bf7127" name="变更" comment="" />
|
8 |
+
<option name="SHOW_DIALOG" value="false" />
|
9 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
10 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
11 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
12 |
+
</component>
|
13 |
+
<component name="FileTemplateManagerImpl">
|
14 |
+
<option name="RECENT_TEMPLATES">
|
15 |
+
<list>
|
16 |
+
<option value="Python Script" />
|
17 |
+
</list>
|
18 |
+
</option>
|
19 |
+
</component>
|
20 |
+
<component name="MarkdownSettingsMigration">
|
21 |
+
<option name="stateVersion" value="1" />
|
22 |
+
</component>
|
23 |
+
<component name="ProjectId" id="2OB6Ia0gJD1cwizqJbUbm4FKBay" />
|
24 |
+
<component name="ProjectViewState">
|
25 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
26 |
+
<option name="showLibraryContents" value="true" />
|
27 |
+
</component>
|
28 |
+
<component name="PropertiesComponent">{
|
29 |
+
"keyToString": {
|
30 |
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
31 |
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
32 |
+
"last_opened_file_path": "/Users/macbook/Documents/Numpy",
|
33 |
+
"settings.editor.selected.configurable": "preferences.lookFeel"
|
34 |
+
}
|
35 |
+
}</component>
|
36 |
+
<component name="RunManager">
|
37 |
+
<configuration name="01_创建数组1" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
38 |
+
<module name="Numpy" />
|
39 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
40 |
+
<option name="PARENT_ENVS" value="true" />
|
41 |
+
<envs>
|
42 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
43 |
+
</envs>
|
44 |
+
<option name="SDK_HOME" value="" />
|
45 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
46 |
+
<option name="IS_MODULE_SDK" value="true" />
|
47 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
48 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
49 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/01_创建数组1.py" />
|
50 |
+
<option name="PARAMETERS" value="" />
|
51 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
52 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
53 |
+
<option name="MODULE_MODE" value="false" />
|
54 |
+
<option name="REDIRECT_INPUT" value="false" />
|
55 |
+
<option name="INPUT_FILE" value="" />
|
56 |
+
<method v="2" />
|
57 |
+
</configuration>
|
58 |
+
</component>
|
59 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
|
60 |
+
<component name="TaskManager">
|
61 |
+
<task active="true" id="Default" summary="默认任务">
|
62 |
+
<changelist id="3bedfeb5-2966-47b5-88f9-48f835bf7127" name="变更" comment="" />
|
63 |
+
<created>1681021549906</created>
|
64 |
+
<option name="number" value="Default" />
|
65 |
+
<option name="presentableId" value="Default" />
|
66 |
+
<updated>1681021549906</updated>
|
67 |
+
</task>
|
68 |
+
<servers />
|
69 |
+
</component>
|
70 |
+
</project>
|
01_Numpy/01_创建数组1.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np # 导入numpy
|
2 |
+
|
3 |
+
# 创建简单数组
|
4 |
+
n1 = np.array([1, 2, 3])
|
5 |
+
print(n1)
|
6 |
+
|
7 |
+
# 创建一个带小数点的数组
|
8 |
+
n2 = np.array([0.1, 0.2, 0.3])
|
9 |
+
print(n2)
|
10 |
+
|
11 |
+
# 创建一个二维数组
|
12 |
+
n3 = np.array([
|
13 |
+
[1, 2],
|
14 |
+
[3, 4],
|
15 |
+
])
|
16 |
+
print(n3)
|
17 |
+
|
18 |
+
"""
|
19 |
+
1、创建数组的语法:
|
20 |
+
numpy.array(object, dtype=None, copy=True, ndmin=0)
|
21 |
+
"""
|
22 |
+
|
23 |
+
# 使用dtype -- 创建数组时,指定数据类型
|
24 |
+
n4 = np.array([1, 2, 3], dtype=float)
|
25 |
+
print(n4)
|
26 |
+
print(n4.dtype) # 查看数组中元素的类型
|
27 |
+
print(type(n4)) # 查看数组本身的类型
|
28 |
+
print(type(n4[0])) # 查看特定元素的类型
|
29 |
+
|
30 |
+
# 使用object -- 进行数据初始化
|
31 |
+
lst = [1, 2, 3]
|
32 |
+
n5 = np.array(lst, dtype=float)
|
33 |
+
print(n5)
|
34 |
+
|
35 |
+
# 使用copy -- 保留原数组中的数据
|
36 |
+
n6 = np.array([1, 2, 3])
|
37 |
+
n7 = np.array(n5, copy=True)
|
38 |
+
n7[0] = 100
|
39 |
+
n7[2] = 99
|
40 |
+
print(n6) # 未变化
|
41 |
+
print(n7)
|
42 |
+
|
43 |
+
# 使用ndmin -- 指定最小维数
|
44 |
+
lst = [1, 2, 3]
|
45 |
+
n8 = np.array(lst, ndmin=3) # 我们要创建3维数组
|
46 |
+
print(n8)
|
47 |
+
|
48 |
+
"""
|
49 |
+
2、不同方式创建数组
|
50 |
+
"""
|
51 |
+
# 创建指定维数的空数组
|
52 |
+
n9 = np.empty([4, 3], dtype=int) # [4,3]表示4行3列,数据类型未初始化(可指定)的数组
|
53 |
+
print(n9)
|
54 |
+
|
55 |
+
# 创建指定维度的数组,以0填充
|
56 |
+
n10 = np.zeros(3) # 1行3列
|
57 |
+
print(n10) # 输出的结果默认为float
|
58 |
+
|
59 |
+
# 创建指定维度的数组,以1填充
|
60 |
+
n11 = np.ones(3) # 1行3列
|
61 |
+
print(n11) # 输出的结果默认为float
|
62 |
+
|
63 |
+
# 创建指定维度的数组,以指定的数值填充
|
64 |
+
n12 = np.full(3, 8) # # 1行3列,以8填充
|
65 |
+
print(n12)
|
66 |
+
|
67 |
+
n13 = np.full((3, 4), 8) # 3行4列,以8填充
|
68 |
+
print(n13)
|
01_Numpy/02_创建数组2.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
"""
|
4 |
+
3、从数值范围创建数组
|
5 |
+
"""
|
6 |
+
# arange(start, stop, step, dtype=None) 含头不含尾
|
7 |
+
n1 = np.arange(1, 11, 2)
|
8 |
+
print(n1)
|
9 |
+
|
10 |
+
# linspace(start, stop, num=50, endpoint=True, retstop=False, dtype=None) 创建等差数列
|
11 |
+
# endpoint: 是否包含尾部 num:数组的长度 retstop:
|
12 |
+
n2 = np.linspace(7500, 10000, 6)
|
13 |
+
print(n2)
|
14 |
+
|
15 |
+
# logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None) 创建等比数列
|
16 |
+
n3 = np.logspace(0, 63, 64, base=2, dtype='uint64')
|
17 |
+
print(n3)
|
18 |
+
|
19 |
+
"""
|
20 |
+
4、生成随机数组
|
21 |
+
"""
|
22 |
+
n4 = np.random.rand(5) # [0,1)之间的,1行5列的随机小数
|
23 |
+
print(n4)
|
24 |
+
|
25 |
+
n5 = np.random.rand(2, 5) # 2行5列的随机小数
|
26 |
+
print(n5)
|
27 |
+
|
28 |
+
# 用于从正态分布中,返回随机生成的数组
|
29 |
+
n6 = np.random.randn(3)
|
30 |
+
print(n6)
|
31 |
+
|
32 |
+
# 生成一定范围的随机数组
|
33 |
+
n7 = np.random.randint(1, 3, 10) # 包头不包尾,产生10个数
|
34 |
+
print(n7)
|
35 |
+
|
36 |
+
n8 = np.random.randint(1, 3, size=(2, 3)) # 包头不包尾,产生2行3列的数组
|
37 |
+
print(n8)
|
38 |
+
|
39 |
+
# 生成正态分布的随机数组
|
40 |
+
n9 = np.random.normal(0, 0.1, 10) # 均值,标准差,维数
|
41 |
+
print(n9)
|
42 |
+
|
43 |
+
n10 = np.random.normal(0, 0.1, size=(2, 3)) # 均值,标准差,维数
|
44 |
+
print(n10)
|
01_Numpy/03_创建数组3.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# asarray使用 -- 从已有的数组中创建数组
|
4 |
+
|
5 |
+
# 通过列表创建数组
|
6 |
+
n1 = np.asarray([1, 2, 3, 4])
|
7 |
+
print(n1)
|
8 |
+
|
9 |
+
# 通过元组列表创建数组
|
10 |
+
n2 = np.asarray([(1, 2, 3), (4, 5, 6), (7, 8, 9)])
|
11 |
+
print(n2)
|
12 |
+
|
13 |
+
# 通过元组创建数组
|
14 |
+
n3 = np.asarray((1, 2, 3))
|
15 |
+
print(n3)
|
16 |
+
|
17 |
+
# 通过元组的元组创建数组
|
18 |
+
n4 = np.asarray(((1, 2, 3), (4, 5, 6), (7, 8, 9)))
|
19 |
+
print(n4)
|
20 |
+
|
21 |
+
# 通过列表元组创建数组
|
22 |
+
n5 = np.asarray(([1, 2, 3], [4, 5, 6], [7, 8, 9]))
|
23 |
+
print(n5)
|
24 |
+
|
25 |
+
# 动态数组
|
26 |
+
n6 = np.frombuffer(b'juanzijie', dtype='S1') # S1表示单个字符串是一个字符
|
27 |
+
print(n6)
|
28 |
+
|
29 |
+
# 从迭代对象中创建数组对象
|
30 |
+
iter = (i for i in range(5))
|
31 |
+
n7 = np.fromiter(iter, dtype='int')
|
32 |
+
print(n7)
|
33 |
+
|
34 |
+
# empty_like的使用 -- 按照一定的模版,创建数据类型不定的数组
|
35 |
+
n8 = np.empty_like([[1, 2], [3, 4]]) # 创建一个2行2列的数组,因为给定的是2行2列
|
36 |
+
print(n8)
|
37 |
+
|
38 |
+
# 创建一个以0填充的,2行2列的数组
|
39 |
+
n9 = np.zeros_like([[1, 2], [3, 4]])
|
40 |
+
print(n9)
|
41 |
+
|
42 |
+
# 创建一个以1填充的,2行2列的数组
|
43 |
+
n10 = np.ones_like([[1, 2], [3, 4]])
|
44 |
+
print(n10)
|
45 |
+
|
46 |
+
# 创建一个以指定数据填充的,2行2列的数组
|
47 |
+
n11 = np.full_like([[1, 2], [3, 4]], 8)
|
48 |
+
print(n11)
|
01_Numpy/04_数组的数据类型.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Numpy中的数据类型:
|
3 |
+
bool_
|
4 |
+
int_
|
5 |
+
int8
|
6 |
+
int16
|
7 |
+
1nt32
|
8 |
+
int64
|
9 |
+
uint8
|
10 |
+
uint16
|
11 |
+
uint32
|
12 |
+
float_
|
13 |
+
...
|
14 |
+
datetime64
|
15 |
+
"""
|
16 |
+
import numpy as np
|
17 |
+
|
18 |
+
n1 = np.array([1, 2, 3, 4], dtype=int)
|
19 |
+
print(n1)
|
20 |
+
|
21 |
+
n2 = np.array([1, 2, 3, 4], dtype='int32')
|
22 |
+
print(n2)
|
23 |
+
|
24 |
+
n3 = np.array([1, 2, 3, 4], dtype='int_')
|
25 |
+
print(n3)
|
26 |
+
|
27 |
+
n4 = np.array([1, 2, 3, 4], dtype='float_')
|
28 |
+
print(n4)
|
29 |
+
|
30 |
+
n5 = np.array([1, 2, 3, 4], dtype=float)
|
31 |
+
print(n5)
|
32 |
+
|
33 |
+
n6 = np.array(['2021-01-01'], dtype='datetime64')
|
34 |
+
print(n6)
|
01_Numpy/05_数组运算.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n1 = np.array([1, 2])
|
4 |
+
n2 = np.array([3, 4])
|
5 |
+
print(n1)
|
6 |
+
print(n2)
|
7 |
+
|
8 |
+
# 数组运算 -- 对应位置的元素进行运算
|
9 |
+
print('加法运算', n1 + n2)
|
10 |
+
print('减法运算', n1 - n2)
|
11 |
+
print('乘法运算', n1 * n2)
|
12 |
+
print('除法运算', n1 / n2)
|
13 |
+
print('幂运算', n1 ** n2)
|
14 |
+
|
15 |
+
print('比较运算', n1 > n2)
|
16 |
+
print('比较运算', n1 < n2)
|
17 |
+
print('比较运算', n1 != n2)
|
01_Numpy/06_索引和切片.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# 数组中的索引
|
4 |
+
n1 = np.array([1, 2, 3, 4])
|
5 |
+
print(n1[1])
|
6 |
+
print(n1[-3])
|
7 |
+
|
8 |
+
n2 = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
|
9 |
+
print(n2[0][1])
|
10 |
+
print(n2[0, 1])
|
11 |
+
print(n2[-2, -3])
|
12 |
+
|
13 |
+
# 数组的切片 [start, stop, step] 含头不含尾
|
14 |
+
n3 = np.array([10, 20, 30, 40, 50, 60])
|
15 |
+
print(n3[:3]) # 省略起始位置,默认从0开始,步长默认为1,[0,3)
|
16 |
+
print(n3[2:5]) # [2,5)
|
17 |
+
print(n3[3:]) # 省略结束位置,则默认到最后一个元素
|
18 |
+
print(n3[:]) # 获取数组中的所有元素
|
19 |
+
|
20 |
+
# 修改步长
|
21 |
+
print(n3[0::2])
|
22 |
+
print(n3[1::5])
|
23 |
+
|
24 |
+
# 步长还可以为负数
|
25 |
+
print(n3[::-1]) # 逆序
|
26 |
+
print(n3[-1:-5:-1])
|
27 |
+
print(n3[-1:-6:-2])
|
01_Numpy/07_列表切片与数组切片.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# 数组的切片
|
4 |
+
array = np.array([10, 20, 30, 40])
|
5 |
+
print(array)
|
6 |
+
|
7 |
+
arr1 = array[1:3]
|
8 |
+
print(arr1)
|
9 |
+
|
10 |
+
# 对切片之后的数组进行修改
|
11 |
+
arr1[0] = 99
|
12 |
+
print(arr1)
|
13 |
+
print(array) # 已经改变
|
14 |
+
|
15 |
+
# 列表的切片
|
16 |
+
lst = [10, 20, 30, 40]
|
17 |
+
lst1 = lst[1:3]
|
18 |
+
print(lst1)
|
19 |
+
|
20 |
+
# 对切片之后的列表进行修改
|
21 |
+
lst1[0] = 99
|
22 |
+
print(lst1)
|
23 |
+
print(lst) # 没有发生修改
|
01_Numpy/08_二维数组的切片操作.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
|
4 |
+
print(arr)
|
5 |
+
|
6 |
+
print(arr[1, 2]) # 1表示索引为1的行,2表示索引为2的列
|
7 |
+
|
8 |
+
print(arr[:2, 1:]) # [0,2)的行,[1,3]的列
|
9 |
+
|
10 |
+
print(arr[1, :2]) # 索引为1的行,[0,2)的列
|
11 |
+
|
12 |
+
print(arr[:2, 2]) # 索引为[0,2)的行,索引为2的列
|
13 |
+
|
14 |
+
print(arr[:, :1]) # 全部行,索引为0的列
|
01_Numpy/09_数组的重塑.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n = np.arange(6)
|
4 |
+
print(n)
|
5 |
+
|
6 |
+
# 重塑 -- 元素个数必须相同,否则报错
|
7 |
+
n1 = n.reshape(2, 3)
|
8 |
+
print(n1)
|
9 |
+
|
10 |
+
n = np.array([[1, 2, 3], [4, 5, 6]])
|
11 |
+
print(n)
|
12 |
+
n1 = n.reshape(3,2) # 重塑
|
13 |
+
print(n1)
|
01_Numpy/10_数组的转置.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n = np.arange(24).reshape(4, 6)
|
4 |
+
print(n)
|
5 |
+
|
6 |
+
# 转置
|
7 |
+
n1 = n.T
|
8 |
+
print(n1)
|
9 |
+
|
10 |
+
n2 = n.transpose()
|
11 |
+
print(n2)
|
12 |
+
|
13 |
+
# 转置练习
|
14 |
+
arr1 = np.array([['A', 100], ['B', 200], ['C', 300], ['D', 400], ['E', 500]])
|
15 |
+
print(arr1)
|
16 |
+
|
17 |
+
arr2 = arr1.T
|
18 |
+
print(arr2)
|
19 |
+
|
01_Numpy/11_数组的增删改查.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n1 = np.array([[1, 2], [3, 4], [5, 6]])
|
4 |
+
# 创建第二个数组
|
5 |
+
n2 = np.array([[10, 20], [30, 40], [50, 60]])
|
6 |
+
print(n1)
|
7 |
+
print(n2)
|
8 |
+
|
9 |
+
"""
|
10 |
+
1、数组的增加
|
11 |
+
"""
|
12 |
+
# 水平方向增加数据 -- 列的个数增加
|
13 |
+
print(np.hstack((n1, n2)))
|
14 |
+
|
15 |
+
# 垂直方向增加数据 -- 行的个数增加
|
16 |
+
print(np.vstack((n1, n2)))
|
17 |
+
|
18 |
+
"""
|
19 |
+
2、数组的删除
|
20 |
+
"""
|
21 |
+
n1 = np.array([[1, 2], [3, 4], [5, 6]])
|
22 |
+
print(n1)
|
23 |
+
|
24 |
+
# 删除第3行 -- 0号轴,索引为2的数据
|
25 |
+
n2 = np.delete(n1, 2, axis=0)
|
26 |
+
print(n2)
|
27 |
+
|
28 |
+
# 删除第1列
|
29 |
+
n3 = np.delete(n1, 0, axis=1)
|
30 |
+
print(n3)
|
31 |
+
|
32 |
+
# 删除第1行和第3行
|
33 |
+
n4 = np.delete(n1, (0, 2), axis=0)
|
34 |
+
print(n4)
|
35 |
+
|
36 |
+
"""
|
37 |
+
3、数组的修改
|
38 |
+
"""
|
39 |
+
n1 = np.array([[1, 2], [3, 4], [5, 6]])
|
40 |
+
n1[1] = [30, 40] # 修改一行
|
41 |
+
print(n1)
|
42 |
+
|
43 |
+
n1[2][1] = 88 # 修改单个元素
|
44 |
+
print(n1)
|
45 |
+
|
46 |
+
"""
|
47 |
+
4、数组的查询
|
48 |
+
"""
|
49 |
+
n = np.arange(1, 11)
|
50 |
+
print(n)
|
51 |
+
|
52 |
+
n2 = n[np.where(n > 5)] # 获取到数组中所有大于5的元素, 返回索引
|
53 |
+
print(n2)
|
54 |
+
|
55 |
+
n3 = np.where(n > 5, 2, 0) # 数组中元素大于5,输出2,否则输出0
|
56 |
+
print(n3)
|
01_Numpy/12_矩阵的操作.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
"""
|
4 |
+
1、创建简单矩阵
|
5 |
+
"""
|
6 |
+
a = np.mat('5 6;7 8')
|
7 |
+
print(a)
|
8 |
+
|
9 |
+
b = np.mat([[5, 6], [7, 8]])
|
10 |
+
print(b)
|
11 |
+
|
12 |
+
print(type(a), type(b)) # 矩阵
|
13 |
+
|
14 |
+
n1 = np.array([[5, 6], [7, 8]])
|
15 |
+
print(type(n1)) # 数组
|
16 |
+
|
17 |
+
"""
|
18 |
+
2、使用mat函数创建常见的矩阵
|
19 |
+
"""
|
20 |
+
# 创建3*3的零矩阵
|
21 |
+
d1 = np.mat(np.zeros((3, 3)))
|
22 |
+
print(d1)
|
23 |
+
|
24 |
+
# 创建2*4的1矩阵
|
25 |
+
d2 = np.mat(np.ones((2, 4)))
|
26 |
+
print(d2)
|
27 |
+
|
28 |
+
# 创建[0,1)随机矩阵
|
29 |
+
d3 = np.mat(np.random.rand(3, 4))
|
30 |
+
print(d3)
|
31 |
+
|
32 |
+
# 创建[1,8)随机矩阵
|
33 |
+
d4 = np.mat(np.random.randint(1, 8, size=(3, 5)))
|
34 |
+
print(d4)
|
35 |
+
|
36 |
+
# 对角矩阵
|
37 |
+
d5 = np.mat(np.eye(5, 5))
|
38 |
+
print(d5)
|
39 |
+
|
40 |
+
# 对角线矩阵
|
41 |
+
d6 = np.mat(np.diag([1, 2, 3]))
|
42 |
+
print(d6)
|
43 |
+
|
44 |
+
d7 = np.mat(np.diag([7, 8, 9]))
|
45 |
+
print(d7)
|
01_Numpy/13_矩阵的运算.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
矩阵的加、减、除运算
|
3 |
+
"""
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# 创建矩阵
|
7 |
+
d1 = np.mat([[1, 2], [3, 4], [5, 6]])
|
8 |
+
print(d1)
|
9 |
+
|
10 |
+
d2 = np.mat([1, 2])
|
11 |
+
print(d2)
|
12 |
+
|
13 |
+
# 矩阵的加法运算
|
14 |
+
d3 = d1 + d2
|
15 |
+
print(d3)
|
16 |
+
|
17 |
+
# 矩阵的减法运算
|
18 |
+
print(d1 - d2)
|
19 |
+
|
20 |
+
# 矩阵的除法运算
|
21 |
+
print(d1 / d2)
|
22 |
+
|
23 |
+
# 矩阵的乘法运算
|
24 |
+
# error: print(d1 * d2)
|
25 |
+
d1 = np.mat([[1, 2], [3, 4], [5, 6]])
|
26 |
+
d2 = np.mat([[1, 2], [3, 4]])
|
27 |
+
print(d1 * d2)
|
28 |
+
|
29 |
+
# 矩阵的转置
|
30 |
+
n1 = np.mat('1 3 3;4 5 6;7 12 9')
|
31 |
+
print(n1)
|
32 |
+
print(n1.T)
|
33 |
+
|
34 |
+
# 矩阵的求逆
|
35 |
+
print(n1.I)
|
01_Numpy/14_数组的相乘与点积.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# 数组的相乘和数组的点积
|
4 |
+
n1 = np.array([[1, 2], [3, 4], [5, 6]])
|
5 |
+
n2 = np.array([1, 2])
|
6 |
+
|
7 |
+
print(n1)
|
8 |
+
print(n2)
|
9 |
+
|
10 |
+
print('数组的乘积', n1 * n2)
|
11 |
+
|
12 |
+
print("数组的点积", np.dot(n1, n2)) # 相乘之后,各行相加
|
13 |
+
|
01_Numpy/15_矩阵相乘与矩阵元素相乘.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
# 矩阵的点积与矩阵元素之间的相乘运算
|
4 |
+
n1 = np.mat('1 3 3;4 5 6;7 12 9')
|
5 |
+
n2 = np.mat('2 6 6;8 10 12;14 24 18')
|
6 |
+
|
7 |
+
print(n1)
|
8 |
+
print(n2)
|
9 |
+
|
10 |
+
print('矩阵相乘的结果:', n1 * n2)
|
11 |
+
print('矩阵对应元素相乘:', np.multiply(n1, n2))
|
12 |
+
|
01_Numpy/16_数学运算函数.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
4 |
+
n2 = np.array([10, 10, 10])
|
5 |
+
print(n1)
|
6 |
+
print(n2)
|
7 |
+
|
8 |
+
# 数组的加减乘除
|
9 |
+
print('加', np.add(n1, n2))
|
10 |
+
print(n1 + n2)
|
11 |
+
|
12 |
+
print('减', np.subtract(n1, n2))
|
13 |
+
print('乘', np.multiply(n1, n2))
|
14 |
+
print('除', np.divide(n1, n2))
|
15 |
+
|
16 |
+
# 倒数
|
17 |
+
n1 = np.array([0.25, 1.75, 2, 100])
|
18 |
+
print(np.reciprocal(n1))
|
19 |
+
|
20 |
+
# 幂运算
|
21 |
+
n1 = np.array([10, 100, 1000])
|
22 |
+
n2 = np.array([1, 2, 3])
|
23 |
+
print(np.power(n1, n2)) # n1的n2次幂
|
24 |
+
|
25 |
+
# 余数 余数 = a - n * (a // n) -- a是除数,n是被除数
|
26 |
+
n1 = np.array([10, 20, 30])
|
27 |
+
n2 = np.array([4, 5, -8])
|
28 |
+
print(np.mod(n1, n2))
|
29 |
+
# a是30,n是-8
|
30 |
+
|
31 |
+
# 四舍五入
|
32 |
+
n = np.array([1.55, 6.823, 100, 0.1189, 3.1315, -3.456])
|
33 |
+
print(n)
|
34 |
+
print(np.around(n)) # 默认取整
|
35 |
+
print(np.around(n, decimals=2)) # 保留2位小数
|
36 |
+
print(np.around(n, decimals=-1)) # 取整到小数点左侧
|
37 |
+
|
38 |
+
# 向上取整与向下取整
|
39 |
+
print(np.ceil(n)) # 向上取整
|
40 |
+
print(np.floor(n)) # 向下取整
|
41 |
+
|
42 |
+
# 三角函数
|
43 |
+
n = np.array([0, 30, 45, 60, 90])
|
44 |
+
print(np.sin(n / 180 * np.pi))
|
01_Numpy/17_统计分析函数.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
n = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
4 |
+
print(n)
|
5 |
+
|
6 |
+
# 整个数组求和
|
7 |
+
print(n.sum())
|
8 |
+
|
9 |
+
# 数组元素行的和
|
10 |
+
print(n.sum(axis=0))
|
11 |
+
|
12 |
+
# 数组元素列的和
|
13 |
+
print(n.sum(axis=1))
|
14 |
+
|
15 |
+
# 平均值
|
16 |
+
print(n.mean())
|
17 |
+
|
18 |
+
# 行的平均值
|
19 |
+
print(n.mean(axis=0))
|
20 |
+
|
21 |
+
# 列的平均值
|
22 |
+
print(n.mean(axis=1))
|
23 |
+
|
24 |
+
# 最大值
|
25 |
+
print(n.max())
|
26 |
+
|
27 |
+
# 行的最大值
|
28 |
+
print(n.max(axis=0))
|
29 |
+
|
30 |
+
# 列的最大值
|
31 |
+
print(n.max(axis=1))
|
32 |
+
|
33 |
+
# 最小值
|
34 |
+
print(n.min())
|
35 |
+
|
36 |
+
# 行的最小值
|
37 |
+
print(n.min(axis=0))
|
38 |
+
|
39 |
+
# 列的最小值
|
40 |
+
print(n.min(axis=1))
|
41 |
+
|
42 |
+
# 加权平均
|
43 |
+
# 数据 * 出现的次数 -> 求平均
|
44 |
+
n1 = np.array([1, 2, 3, 4, 5]) # 数组中的数据
|
45 |
+
n2 = np.array([10, 20, 30, 40, 50]) # 表示各个数出现的次数
|
46 |
+
print(np.average(n1, weights=n2))
|
47 |
+
|
48 |
+
# 中位数 -- 数组有序
|
49 |
+
n1 = np.array([1, 2, 3, 4, 5, 99])
|
50 |
+
print(np.median(n1))
|
51 |
+
|
52 |
+
# 方差 -- 各组数据 与 平均数 差的平方
|
53 |
+
n = np.array([1, 2, 3, 4, 5, 6])
|
54 |
+
print(n.var())
|
55 |
+
|
56 |
+
# 标准差/均方差 -- 方差的平方根
|
57 |
+
n = np.array([1, 2, 3, 4, 5, 6])
|
58 |
+
print(n.std())
|
01_Numpy/18_数组的排序.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 数组的排序
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
n = np.array([[4, 7, 3], [2, 8, 5], [9, 1, 6]])
|
5 |
+
print(n)
|
6 |
+
|
7 |
+
print('数组排序', np.sort(n)) # 排序默认升序,按列排序
|
8 |
+
|
9 |
+
# 按行排序
|
10 |
+
print(np.sort(n, axis=0))
|
11 |
+
|
12 |
+
# 按列排序
|
13 |
+
print(np.sort(n, axis=1))
|
14 |
+
|
15 |
+
# argsort() 排序
|
16 |
+
x = np.array([9, 4, 7, 2, 4, 8, 3])
|
17 |
+
y = np.argsort(x)
|
18 |
+
print(y) # 得到升序排序后的索引值
|
19 |
+
# 排序后重构数组
|
20 |
+
print(x[y])
|
21 |
+
|
22 |
+
# lexsort() 排序
|
23 |
+
math = np.array([101, 109, 115, 108, 118, 112, 118])
|
24 |
+
english = np.array([117, 105, 118, 98, 109, 98, 120])
|
25 |
+
total = np.array([621, 623, 620, 620, 615, 615, 450])
|
26 |
+
|
27 |
+
sort_total = np.lexsort((english, math, total)) # 先按总分,再比数学,最后比英语
|
28 |
+
print(sort_total) # 排序后的索引值
|
29 |
+
lst = [[total[i], math[i], english[i]] for i in sort_total]
|
30 |
+
n = np.array(lst)
|
31 |
+
print(n)
|
01_Numpy/19_应用_图像灰度处理.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
# 读取图片
|
5 |
+
n1 = plt.imread('img.jpg')
|
6 |
+
|
7 |
+
# 传入数组,显示对应颜色
|
8 |
+
plt.imshow(n1)
|
9 |
+
print(n1)
|
10 |
+
|
11 |
+
n2 = np.array([0.299, 0.587, 0.114, 1])
|
12 |
+
|
13 |
+
# 数组的点乘运算
|
14 |
+
x = np.dot(n1, n2)
|
15 |
+
|
16 |
+
# 传入数组显示灰度
|
17 |
+
plt.imshow(x, cmap='gray')
|
18 |
+
|
19 |
+
# 显示图像
|
20 |
+
plt.show()
|
01_Numpy/img.jpg
ADDED
![]() |
Git LFS Details
|
02_Pandas/.DS_Store
ADDED
Binary file (10.2 kB). View file
|
|
02_Pandas/.idea/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# 默认忽略的文件
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
02_Pandas/.idea/Pandas.iml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$" />
|
5 |
+
<orderEntry type="inheritedJdk" />
|
6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
7 |
+
</component>
|
8 |
+
</module>
|
02_Pandas/.idea/encodings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Encoding">
|
4 |
+
<file url="file://$PROJECT_DIR$/resources/京东鞋子评论数据.csv" charset="GBK" />
|
5 |
+
</component>
|
6 |
+
</project>
|
02_Pandas/.idea/inspectionProfiles/Project_Default.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<profile version="1.0">
|
3 |
+
<option name="myName" value="Project Default" />
|
4 |
+
<inspection_tool class="PyPep8Inspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
5 |
+
</profile>
|
6 |
+
</component>
|
02_Pandas/.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
02_Pandas/.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
02_Pandas/.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/Pandas.iml" filepath="$PROJECT_DIR$/.idea/Pandas.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
02_Pandas/.idea/workspace.xml
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="AutoImportSettings">
|
4 |
+
<option name="autoReloadType" value="SELECTIVE" />
|
5 |
+
</component>
|
6 |
+
<component name="ChangeListManager">
|
7 |
+
<list default="true" id="37e61199-e056-4c84-b201-bf4ad9d76cef" name="变更" comment="" />
|
8 |
+
<option name="SHOW_DIALOG" value="false" />
|
9 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
10 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
11 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
12 |
+
</component>
|
13 |
+
<component name="FileTemplateManagerImpl">
|
14 |
+
<option name="RECENT_TEMPLATES">
|
15 |
+
<list>
|
16 |
+
<option value="Python Script" />
|
17 |
+
</list>
|
18 |
+
</option>
|
19 |
+
</component>
|
20 |
+
<component name="MarkdownSettingsMigration">
|
21 |
+
<option name="stateVersion" value="1" />
|
22 |
+
</component>
|
23 |
+
<component name="ProjectId" id="2OBzO8pbPOosxZakYpmH3wV9uUP" />
|
24 |
+
<component name="ProjectViewState">
|
25 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
26 |
+
<option name="showLibraryContents" value="true" />
|
27 |
+
</component>
|
28 |
+
<component name="PropertiesComponent">{
|
29 |
+
"keyToString": {
|
30 |
+
"RunOnceActivity.OpenProjectViewOnStart": "true",
|
31 |
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
32 |
+
"last_opened_file_path": "/Users/macbook/Documents/Pandas"
|
33 |
+
}
|
34 |
+
}</component>
|
35 |
+
<component name="RecentsManager">
|
36 |
+
<key name="MoveFile.RECENT_KEYS">
|
37 |
+
<recent name="$PROJECT_DIR$/resources" />
|
38 |
+
<recent name="$PROJECT_DIR$" />
|
39 |
+
</key>
|
40 |
+
</component>
|
41 |
+
<component name="RunManager">
|
42 |
+
<configuration name="01_Pandas初步使用" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
43 |
+
<module name="Pandas" />
|
44 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
45 |
+
<option name="PARENT_ENVS" value="true" />
|
46 |
+
<envs>
|
47 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
48 |
+
</envs>
|
49 |
+
<option name="SDK_HOME" value="" />
|
50 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
51 |
+
<option name="IS_MODULE_SDK" value="true" />
|
52 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
53 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
54 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/01_Pandas初步使用.py" />
|
55 |
+
<option name="PARAMETERS" value="" />
|
56 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
57 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
58 |
+
<option name="MODULE_MODE" value="false" />
|
59 |
+
<option name="REDIRECT_INPUT" value="false" />
|
60 |
+
<option name="INPUT_FILE" value="" />
|
61 |
+
<method v="2" />
|
62 |
+
</configuration>
|
63 |
+
</component>
|
64 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
|
65 |
+
<component name="TaskManager">
|
66 |
+
<task active="true" id="Default" summary="默认任务">
|
67 |
+
<changelist id="37e61199-e056-4c84-b201-bf4ad9d76cef" name="变更" comment="" />
|
68 |
+
<created>1681048727675</created>
|
69 |
+
<option name="number" value="Default" />
|
70 |
+
<option name="presentableId" value="Default" />
|
71 |
+
<updated>1681048727675</updated>
|
72 |
+
</task>
|
73 |
+
<servers />
|
74 |
+
</component>
|
75 |
+
</project>
|
02_Pandas/01_Pandas初步使用.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
data = pd.read_excel('resources/中超赛事表.xlsx')
|
4 |
+
print(data)
|
02_Pandas/02_Series对象.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
pd.Series(data, index)
|
3 |
+
data:数据
|
4 |
+
index:索引
|
5 |
+
"""
|
6 |
+
import pandas as pd
|
7 |
+
|
8 |
+
d1 = ['李光地', '张红云', '王鹏']
|
9 |
+
s = pd.Series(data=d1, index=[1, 2, 3]) # 数据和索引,索引默认从0开始
|
10 |
+
print(s)
|
11 |
+
print(type(s))
|
12 |
+
|
13 |
+
# 指定数据和索引
|
14 |
+
d2 = [90, 98, 87]
|
15 |
+
index = ['张三', '李四', '王五']
|
16 |
+
s = pd.Series(data=d2, index=index)
|
17 |
+
print(s)
|
18 |
+
print(s['张三']) # 标签索引
|
19 |
+
print(s[['张三', '王五']])
|
20 |
+
print(s['张三': '王五']) # 切片索引 -- 含头含尾
|
21 |
+
print("=========================")
|
22 |
+
|
23 |
+
print(s.index)
|
24 |
+
print(list(s.index))
|
25 |
+
print(s.values) # array类型
|
26 |
+
print(type(s.values))
|
27 |
+
print("=========================")
|
28 |
+
|
29 |
+
data = ['李光地', '张红云', '王鹏']
|
30 |
+
s = pd.Series(data=data)
|
31 |
+
print(s)
|
32 |
+
print(s[0]) # 位置索引
|
33 |
+
print(s[0:2:1]) # 切片索引 -- 含头不含尾
|
02_Pandas/03_DataFrame对象.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
pd.DataFrame(data, index, columns, dtype)
|
3 |
+
data: 数据
|
4 |
+
index: 行索引
|
5 |
+
columns: 列索引
|
6 |
+
dtype: 数据类型
|
7 |
+
"""
|
8 |
+
import pandas as pd
|
9 |
+
|
10 |
+
# 列表方式创建DataFrame对象
|
11 |
+
data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
|
12 |
+
columns = ['名称', '单价', '数量']
|
13 |
+
df = pd.DataFrame(data=data, columns=columns)
|
14 |
+
print(df)
|
15 |
+
print(type(df))
|
16 |
+
|
17 |
+
# 字典方式创建DataFrame对象 -- 要求列表长度一致
|
18 |
+
data = {
|
19 |
+
'名称': ['小太阳', '鼠标', '小刀'],
|
20 |
+
'单价': [320.9, 150.3, 1.5],
|
21 |
+
'数量': [100, 50, 200]
|
22 |
+
}
|
23 |
+
df = pd.DataFrame(data=data)
|
24 |
+
print(df)
|
25 |
+
|
26 |
+
# 自动填充
|
27 |
+
data = {
|
28 |
+
'名称': ['小太阳', '鼠标', '小刀'],
|
29 |
+
'单价': [320.9, 150.3, 1.5],
|
30 |
+
'数量': [100, 50, 200],
|
31 |
+
'公司': '东门超市'
|
32 |
+
}
|
33 |
+
df = pd.DataFrame(data=data)
|
34 |
+
print(df)
|
02_Pandas/04_DataFrame属性.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
DataFrame对应属性:
|
3 |
+
T: 行列数据转换
|
4 |
+
head: 查看前N条数据,默认5条
|
5 |
+
tail: 查看后N条数据,默认5条
|
6 |
+
shape:查看行数和列数,shape[0]表示行,shape[1]表示列
|
7 |
+
info: 查看索引,数据类型,内存信息
|
8 |
+
"""
|
9 |
+
import pandas as pd
|
10 |
+
|
11 |
+
data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
|
12 |
+
columns = ['名称', '单价', '数量']
|
13 |
+
df = pd.DataFrame(data=data, columns=columns)
|
14 |
+
print(df)
|
15 |
+
|
16 |
+
print('查看所有元素的值\n', df.values)
|
17 |
+
|
18 |
+
print('查看所有元素的类型\n', df.dtypes)
|
19 |
+
|
20 |
+
print('查看所有行名称\n', list(df.index))
|
21 |
+
|
22 |
+
df.index = [1, 2, 3] # 修改行索引名称
|
23 |
+
print(df)
|
24 |
+
|
25 |
+
print('查看所有列索引\n', list(df.columns))
|
26 |
+
|
27 |
+
df.columns = ['商品名称', '最新单价', '实时数量'] # 修改列索引名称
|
28 |
+
print(df)
|
29 |
+
|
30 |
+
# 行列数据的转换
|
31 |
+
pd.set_option('display.unicode.east_asian_width', True) # 规整格式
|
32 |
+
new_df = df.T
|
33 |
+
print(new_df)
|
34 |
+
|
35 |
+
print('查看前N条数据\n', df.head(1))
|
36 |
+
|
37 |
+
print('查看后N条数据\n', df.tail(1))
|
38 |
+
|
39 |
+
print('行', df.shape[0], ' 列', df.shape[1]) # 行列数
|
40 |
+
|
41 |
+
print('查看索引,数据类型,内存信息\n', df.info)
|
02_Pandas/05_DataFrame重要函数.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
data = [['小太阳', 320.9, 100], ['鼠标', 150.3, 50], ['小刀', 1.5, 200]]
|
4 |
+
columns = ['名称', '单价', '数量']
|
5 |
+
df = pd.DataFrame(data=data, columns=columns)
|
6 |
+
print(df)
|
7 |
+
|
8 |
+
print(df.describe()) # 描述信息
|
9 |
+
|
10 |
+
print(df.count()) # 非空值的个数
|
11 |
+
|
12 |
+
print(df.sum()) # 求和
|
13 |
+
|
14 |
+
print(df.max()) # 最大值 -- 中文的比较 -> 翻译成英文后排序
|
15 |
+
|
16 |
+
print(df.min()) # 最小值
|
02_Pandas/06_导入excel数据.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
pd.read_excel(io, sheet_name, header): 导入.xls或.xlsx文件
|
3 |
+
io: 表示.xls或.xlsx的 文件路径 或 文件对象
|
4 |
+
sheet_name: 表示工作表 -- 一个excel有多个工作表,可以写表名或序号,序号从0开始。None表示读取所有工作表
|
5 |
+
header: 默认值为0,取第一行为列名,如果第一行不为列名,则设置header=None
|
6 |
+
"""
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
excel = pd.read_excel('resources/京东鞋子评论信息.xlsx', sheet_name='码数分析', header=None)
|
10 |
+
print(excel)
|
11 |
+
|
12 |
+
# 导入一列数据
|
13 |
+
excel = pd.read_excel('resources/02微机原理学员成绩统计.xlsx', sheet_name='02微机原理及格学员名单', usecols=[1])
|
14 |
+
print(excel)
|
15 |
+
|
16 |
+
# 导入多个列数据
|
17 |
+
pd.set_option('display.unicode.east_asian_width', True)
|
18 |
+
excel = pd.read_excel("resources/02微机原理学员成绩统计.xlsx", sheet_name='02微机原理及格学员名单',
|
19 |
+
usecols=['姓名', '总成绩'])
|
20 |
+
print(excel)
|
02_Pandas/07_导入各种类型的文件.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
"""
|
4 |
+
导入CSV/txt文件:
|
5 |
+
pd.read_csv(filepath_or_buffer, sep=',', header, encoding=None)
|
6 |
+
filepath_or_buffer: 字符串、文件路径,也可以是URL链接
|
7 |
+
sep:字符串、分隔符
|
8 |
+
header:指定作为列名的行,默认是0,如果没有,需指定为None
|
9 |
+
encoding:编码
|
10 |
+
"""
|
11 |
+
pd.set_option('display.unicode.east_asian_width', True)
|
12 |
+
csv = pd.read_csv('resources/京东鞋子评论数据.csv', sep=',', header=0, encoding="GBK")
|
13 |
+
print(csv)
|
14 |
+
|
15 |
+
# 导入txt文件
|
16 |
+
txt = pd.read_csv('resources/rating.txt', sep='\t', header=None, encoding="GBK")
|
17 |
+
print(txt)
|
18 |
+
|
19 |
+
"""
|
20 |
+
导入HTML网页 -- 只能读取table标签里的内容
|
21 |
+
pd.read_html(io, match='', flavor, header, encoding)
|
22 |
+
io: 字符串、文件路径,也可以是URL链接,不支持https
|
23 |
+
match: 正则表达式
|
24 |
+
flavor: 解释器默认为'lxml'
|
25 |
+
"""
|
26 |
+
url = 'http://www.espn.com/nba/salaries'
|
27 |
+
df = pd.DataFrame() # 创建空的DataFrame对象
|
28 |
+
|
29 |
+
# DataFrame添加数据
|
30 |
+
df = df._append(pd.read_html(url, header=0))
|
31 |
+
print(df)
|
32 |
+
|
33 |
+
# 保存成CSV文件
|
34 |
+
df.to_csv('NBA_salary.csv', index=False) # 不要索引
|
02_Pandas/08_数据抽取.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
data = [[45, 65, 100], [56, 45, 50], [67, 67, 67]]
|
4 |
+
index = ['张三', '李四', '王五']
|
5 |
+
columns = ['数学', '语文', '英语']
|
6 |
+
|
7 |
+
pd.set_option('display.unicode.east_asian_width', True)
|
8 |
+
df = pd.DataFrame(data=data, index=index, columns=columns)
|
9 |
+
print(df)
|
10 |
+
|
11 |
+
"""
|
12 |
+
1、提取行数据
|
13 |
+
"""
|
14 |
+
# 提取行数据
|
15 |
+
print(df.loc['张三']) # 行索引名称
|
16 |
+
print(df.iloc[0]) # 行索引编号
|
17 |
+
|
18 |
+
# 提取多行数据
|
19 |
+
print(df.loc[['张三', '王五']])
|
20 |
+
print(df.iloc[[0, 2]])
|
21 |
+
|
22 |
+
# 提取连续的多行数据
|
23 |
+
print(df.loc['张三':'王五']) # 行索引名称,包头包尾
|
24 |
+
print(df.iloc[0:2]) # 行索引编号,包头不包尾
|
25 |
+
print(df.iloc[1::]) # (start: stop: step)
|
26 |
+
|
27 |
+
"""
|
28 |
+
2、提取列数据
|
29 |
+
"""
|
30 |
+
# 提取列数据
|
31 |
+
print(df[['数学', '英语']]) # 使用列名提取
|
32 |
+
print(df.loc[:, ['数学', '英语']]) # ','左侧表示行,右侧表示列
|
33 |
+
print(df.iloc[:, [0, 2]]) # 使用列序号
|
34 |
+
|
35 |
+
# 提取连续的列
|
36 |
+
print(df.loc[:, '语文':])
|
37 |
+
print(df.iloc[:, 1:])
|
38 |
+
|
39 |
+
"""
|
40 |
+
3、提取区域数据
|
41 |
+
"""
|
42 |
+
# 提取区域数据
|
43 |
+
print(df.loc[['张三', '王五'], ['数学', '语文']])
|
44 |
+
print(df.iloc[[0, 2], 0:2]) # ','左侧表示行,右侧表示列 -- 连续时用: 不连续时用[]
|
45 |
+
|
46 |
+
print(df.iloc[:,0]) # 所有行的第一列数据
|
47 |
+
|
48 |
+
"""
|
49 |
+
4、提取指定条件的数据
|
50 |
+
"""
|
51 |
+
print(df.loc[df['语文'] > 60]) # 提取语文大于60的数据
|
52 |
+
|
53 |
+
print(df.loc[(df['语文'] > 60) & (df['数学'] > 60)]) # 提取语文和数学都大于60的数据
|
54 |
+
|
02_Pandas/09_数据的操作.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
data = [[45, 65, 100], [56, 45, 50], [67, 67, 67]]
|
4 |
+
index = ['张三', '李四', '王五']
|
5 |
+
columns = ['数学', '语文', '英语']
|
6 |
+
|
7 |
+
pd.set_option('display.unicode.east_asian_width', True)
|
8 |
+
df = pd.DataFrame(data=data, index=index, columns=columns)
|
9 |
+
print(df)
|
10 |
+
|
11 |
+
"""
|
12 |
+
1、按列增加数据
|
13 |
+
"""
|
14 |
+
# 采用直接赋值的方式
|
15 |
+
df['政治'] = [90, 89, 100]
|
16 |
+
print(df)
|
17 |
+
|
18 |
+
# 使用loc属性
|
19 |
+
df.loc[:, '化学'] = [30, 89, 75]
|
20 |
+
print(df)
|
21 |
+
|
22 |
+
# 在指定索引位置加入一列
|
23 |
+
lst = [79, 85, 69]
|
24 |
+
df.insert(1, '历史', lst)
|
25 |
+
print(df)
|
26 |
+
|
27 |
+
"""
|
28 |
+
2、按行增加数据
|
29 |
+
"""
|
30 |
+
df.loc['陈六'] = [87, 56, 79, 0, 0, 0] # 添加一行
|
31 |
+
print(df)
|
32 |
+
|
33 |
+
# 添加多行
|
34 |
+
new_df = pd.DataFrame(
|
35 |
+
data={
|
36 |
+
'数学': [78, 90],
|
37 |
+
'历史': [56, 67],
|
38 |
+
'语文': [78, 65],
|
39 |
+
'英语': [90, 87],
|
40 |
+
'政治': [78, 68],
|
41 |
+
'化学': [90, 78]
|
42 |
+
},
|
43 |
+
index=['张丽丽', '王一一']
|
44 |
+
)
|
45 |
+
df = df._append(new_df)
|
46 |
+
print(df)
|
47 |
+
|
48 |
+
"""
|
49 |
+
3、修改数据
|
50 |
+
"""
|
51 |
+
# 修改列标题 -- 直接使用columns属性
|
52 |
+
df.columns = ['数学(上)', '历史', '语文', '英语(下)', '政治', '化学']
|
53 |
+
print(df)
|
54 |
+
|
55 |
+
# 修改列标题 -- 使用rename函数, inplace:是否直接修改DataFrame
|
56 |
+
df.rename(columns={'数学(上)': '数学(下)'}, inplace=True)
|
57 |
+
print(df)
|
58 |
+
|
59 |
+
# 修改行标题 -- 直接赋值
|
60 |
+
df.index = list('123456')
|
61 |
+
print(df)
|
62 |
+
|
63 |
+
# 修改行标题 -- rename方法
|
64 |
+
df.rename({'1': '一', '2': '二'}, inplace=True, axis=0)
|
65 |
+
print(df)
|
66 |
+
|
67 |
+
# 修改行数据
|
68 |
+
df.loc['3'] = [100, 100, 100, 100, 100, 100]
|
69 |
+
print(df)
|
70 |
+
|
71 |
+
df.iloc[0, :] = [100, 100, 100, 100, 100, 100] # 修改0行 所有数据
|
72 |
+
print(df)
|
73 |
+
|
74 |
+
# 修改列数据
|
75 |
+
df.loc[:, '数学(下)'] = [120, 120, 120, 120, 120, 120]
|
76 |
+
print(df)
|
77 |
+
df.iloc[:, 0] = [90, 90, 90, 90, 90, 90]
|
78 |
+
print(df)
|
79 |
+
|
80 |
+
# 修改某一处数据
|
81 |
+
df.loc['4', '语文'] = 150
|
82 |
+
print(df)
|
83 |
+
|
84 |
+
df.iloc[3, 2] = 250 # 按索引
|
85 |
+
print(df)
|
86 |
+
|
87 |
+
"""
|
88 |
+
4、数据的删除
|
89 |
+
"""
|
90 |
+
# 删除列
|
91 |
+
df.drop(['数学(下)'], axis=1, inplace=True)
|
92 |
+
print(df)
|
93 |
+
|
94 |
+
df.drop(columns='历史', inplace=True)
|
95 |
+
print(df)
|
96 |
+
|
97 |
+
df.drop(labels='化学', axis=1, inplace=True)
|
98 |
+
print(df)
|
99 |
+
|
100 |
+
# 删除行
|
101 |
+
df.drop(['6'], axis=0, inplace=True)
|
102 |
+
print(df)
|
103 |
+
|
104 |
+
df.drop(index='5', inplace=True)
|
105 |
+
print(df)
|
106 |
+
|
107 |
+
df.drop(labels='4', axis=0, inplace=True)
|
108 |
+
print(df)
|
109 |
+
|
110 |
+
# 带条件的删除,删除数学成绩大于90 的第二个数据
|
111 |
+
print(df[df['政治'] > 90])
|
112 |
+
print(df[df['政治'] > 90].index)
|
113 |
+
|
114 |
+
df.drop(df[df['政治'] > 90].index[1], inplace=True)
|
115 |
+
print(df)
|
116 |
+
|
02_Pandas/10_数据清洗.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
# 显示完整数据
|
4 |
+
pd.set_option('display.max_rows', 500)
|
5 |
+
pd.set_option('display.max_columns', 100)
|
6 |
+
pd.set_option('display.width', 1000)
|
7 |
+
|
8 |
+
pd.set_option('display.unicode.east_asian.width', True)
|
9 |
+
df = pd.read_excel('resources/msb课程记录.xls')
|
10 |
+
print(df)
|
11 |
+
print('==========================')
|
12 |
+
|
13 |
+
# 查看缺失值
|
14 |
+
print(df.info())
|
15 |
+
print('==========================')
|
16 |
+
|
17 |
+
# 判断缺失值
|
18 |
+
print(df.isnull()) # 不为NaN时为False
|
19 |
+
print('==========================')
|
20 |
+
|
21 |
+
print(df.notnull()) # 不为NaN时为True
|
22 |
+
print('==========================')
|
23 |
+
|
24 |
+
# 对缺失值的处理 -- 删除
|
25 |
+
df = pd.read_excel('resources/msb课程记录.xls')
|
26 |
+
print(df['课程总数量'].notnull())
|
27 |
+
print(df[df['课程总数量'].notnull()]) # 将'课程总数量'缺失的行删除
|
28 |
+
|
29 |
+
print('==========================')
|
30 |
+
df = df.dropna() # 将有缺失值的行删除
|
31 |
+
print(df)
|
32 |
+
print('==========================')
|
33 |
+
|
34 |
+
# 对缺失值的处理 -- 填充
|
35 |
+
df = pd.read_excel('resources/msb课程记录.xls')
|
36 |
+
df['课程总数量'] = df['课程总数量'].fillna(0) # 如果'课程总数量'缺失,则填充为0
|
37 |
+
print(df)
|
38 |
+
print('==========================')
|
39 |
+
|
40 |
+
# 对重复值处理 -- 删除
|
41 |
+
print(df.duplicated()) # 判断重复值是否存在 -- 各列完全一样
|
42 |
+
df = df.drop_duplicates() # 去除所有重复数据 -- 各个数据完全一样
|
43 |
+
print(df)
|
44 |
+
|
45 |
+
df = df.drop_duplicates(['买家实际支付金额'], keep='last') # 删除'买家实际支付金额'的重复数据 -- 保留重复行中的最后一行
|
46 |
+
print(df)
|
47 |
+
|
48 |
+
# 直接删除,保留副本
|
49 |
+
df1 = df.drop_duplicates(['课程总数量'], inplace=False)
|
50 |
+
print(df1) # 新数据是删除之后的结果
|
51 |
+
print(df) # 原本的数据不变
|
02_Pandas/11_索引.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
s1 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
|
4 |
+
print(s1)
|
5 |
+
|
6 |
+
s2 = pd.Series([2, 3, 4], index=['a', 'b', 'c'])
|
7 |
+
print(s2)
|
8 |
+
|
9 |
+
print(s1 + s2) # 同索引的元素相加
|
10 |
+
|
11 |
+
"""
|
12 |
+
1、Series重新设置索引
|
13 |
+
"""
|
14 |
+
# 重新设置索引
|
15 |
+
s = pd.Series([11, 22, 33], index=[1, 2, 3])
|
16 |
+
print(s)
|
17 |
+
print(s.reindex(range(1, 6))) # 多出来两个为NaN
|
18 |
+
|
19 |
+
# 使用0进行填充
|
20 |
+
print(s.reindex(range(1, 6), fill_value=0))
|
21 |
+
|
22 |
+
# 向前填充和向后填充
|
23 |
+
print(s.reindex(range(1, 6), method='ffill')) # 向前填充,出现NaN时,和前一个索引的值保持一致
|
24 |
+
print(s.reindex(range(1, 6), method='bfill')) # 向后填充,出现NaN时,和后一个索引的值保持一致
|
25 |
+
|
26 |
+
"""
|
27 |
+
2、DataFrame对象重新设置索引
|
28 |
+
"""
|
29 |
+
data = [[90, 100, 90], [100, 39, 89], [49, 79, 87]]
|
30 |
+
index = ['msb1001', 'msb1002', 'msb1003']
|
31 |
+
columns = ['数学', '语文', '英语']
|
32 |
+
|
33 |
+
pd.set_option('display.unicode.east_asian.width', True)
|
34 |
+
df = pd.DataFrame(data=data, index=index, columns=columns)
|
35 |
+
print(df)
|
36 |
+
|
37 |
+
# 重新设置行索引
|
38 |
+
print(df.reindex(['msb1001', 'msb1002', 'msb1003', 'msb1004', 'msb1005']))
|
39 |
+
|
40 |
+
# 重新设置列索引
|
41 |
+
print(df.reindex(columns=['数学', '语文', '英语', '政治', '历史']))
|
42 |
+
|
43 |
+
# 同时设置行索引和列索引
|
44 |
+
print(df.reindex(index=['msb1001', 'msb1002', 'msb1003', 'msb1004', 'msb1005', 'msb1006'],
|
45 |
+
columns=['数学', '语文', '英语', '政治', '历史', '地理'],fill_value=0))
|
46 |
+
|
47 |
+
"""
|
48 |
+
3、设置某列为行索引
|
49 |
+
"""
|
50 |
+
# 显示完整数据
|
51 |
+
pd.set_option('display.max_rows', 500)
|
52 |
+
pd.set_option('display.max_columns', 100)
|
53 |
+
pd.set_option('display.width', 1000)
|
54 |
+
|
55 |
+
excel = pd.read_excel('resources/msb课程记录.xls')
|
56 |
+
print(excel)
|
57 |
+
|
58 |
+
excel = excel.set_index(['买家会员名']) # 以'买家会员名'为列索引
|
59 |
+
print(excel)
|
60 |
+
|
61 |
+
"""
|
62 |
+
4、数据清洗之后,设置连续的索引
|
63 |
+
"""
|
64 |
+
excel = pd.read_excel('resources/msb课程记录.xls')
|
65 |
+
print(excel.dropna()) # 清理NaN数据
|
66 |
+
print(excel.dropna().reset_index(drop=True)) # 重新设置索引
|
02_Pandas/12_数据的排序.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
DataFrame的排序:
|
3 |
+
df.sort_values(by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False)
|
4 |
+
by: 要排序的列表名称
|
5 |
+
axis: 轴,0表示行,1表示列
|
6 |
+
ascending: 升序或者降序
|
7 |
+
inplace: 如果值为True,则就地排序
|
8 |
+
kind: 指定排序算法,'quicksort'快速排序,'mergesort'混合排序,'heapsort'堆排
|
9 |
+
na_position: 空值的位置,'first'时空值排在前面,'last'时NaN排在后面
|
10 |
+
ignore_index: 是否忽略索引,True则标记索引,False则忽略索引
|
11 |
+
"""
|
12 |
+
import pandas as pd
|
13 |
+
|
14 |
+
pd.set_option('display.max_rows', 500)
|
15 |
+
pd.set_option('display.max_columns', 100)
|
16 |
+
pd.set_option('display.width', 1000)
|
17 |
+
pd.set_option('display.unicode.east_asian.width', True)
|
18 |
+
|
19 |
+
excel = pd.read_excel('resources/电脑配件销售记录.xlsx')
|
20 |
+
print(excel.head()) # 显示前5条
|
21 |
+
|
22 |
+
# 排序
|
23 |
+
excel = excel.sort_values(by='成交金额') # 默认升序
|
24 |
+
print(excel.head())
|
25 |
+
|
26 |
+
excel = excel.sort_values(by='成交金额', ascending=False) # 改成降序
|
27 |
+
print(excel.head())
|
28 |
+
|
29 |
+
# 根据多列进行排序,数量和成交金额 -- 先比较数量,再比较成交金额
|
30 |
+
excel = excel.sort_values(by=['数量','成交金额'], ascending=False)
|
31 |
+
print(excel)
|