{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "treated-gardening",
"metadata": {},
"outputs": [],
"source": [
"# DataFrame Series 两种数据结构\n",
"# 集成时间序列功能\n",
"# 提供丰富的数学运算和操作\n",
"# 灵活处理缺失数据\n",
"# pip install pandas 安装"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "million-jerusalem",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2\n",
"1 3\n",
"2 4\n",
"3 5\n",
"dtype: int64"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"pd.Series([2,3,4,5])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "straight-norfolk",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1 | \n",
" 6 | \n",
"
\n",
" \n",
" b | \n",
" 2 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3 | \n",
" 9 | \n",
"
\n",
" \n",
" d | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1 6\n",
"b 2 7\n",
"c 3 9\n",
"d 4 2"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\"one\":[1,2,3,4],\"two\":[6,7,9,2]},index=['a','b','c','d'])\n",
"df "
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "strong-employment",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 6 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 7 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 9 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"0 1 6\n",
"1 2 7\n",
"2 3 9\n",
"3 4 2"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = pd.DataFrame({\"one\":[1,2,3,4],\"two\":[6,7,9,2]})\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "accepting-frank",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1.0 | \n",
" 1 | \n",
"
\n",
" \n",
" b | \n",
" 2.0 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1.0 1\n",
"b 2.0 7\n",
"c 3.0 9\n",
"x NaN 8"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = pd.DataFrame({\"one\":pd.Series([1,2,3],index=['a','b','c']),\"two\":pd.Series([7,9,1,8],index=['b','c','a','x'])})\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "scientific-electricity",
"metadata": {},
"outputs": [],
"source": [
"### 从 CSV文件 读取 或者 把 DataFrame 存入到 csv\n",
"pd.read_csv('test.csv') # 从文件读取\n",
"df.to_csv('test.csv') # 保存成 csv文件"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "worse-poster",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1.0 | \n",
" 1 | \n",
"
\n",
" \n",
" b | \n",
" 2.0 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1.0 1\n",
"b 2.0 7\n",
"c 3.0 9\n",
"x NaN 8"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "starting-chinese",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['a', 'b', 'c', 'x'], dtype='object')"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.index # 获取索引"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "wired-circular",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1., 1.],\n",
" [ 2., 7.],\n",
" [ 3., 9.],\n",
" [nan, 8.]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.values # 获取值 返回二维数组"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "basic-buyer",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['one', 'two'], dtype='object')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.columns # 获取 列索引"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "aware-retention",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" x | \n",
"
\n",
" \n",
" \n",
" \n",
" one | \n",
" 1.0 | \n",
" 2.0 | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
" two | \n",
" 1.0 | \n",
" 7.0 | \n",
" 9.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c x\n",
"one 1.0 2.0 3.0 NaN\n",
"two 1.0 7.0 9.0 8.0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.T # 转置"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "afraid-moore",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1 | \n",
" 6 | \n",
"
\n",
" \n",
" b | \n",
" 2 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3 | \n",
" 9 | \n",
"
\n",
" \n",
" d | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1 6\n",
"b 2 7\n",
"c 3 9\n",
"d 4 2"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ranking-oliver",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 4.000000 | \n",
" 4.00000 | \n",
"
\n",
" \n",
" mean | \n",
" 2.500000 | \n",
" 6.00000 | \n",
"
\n",
" \n",
" std | \n",
" 1.290994 | \n",
" 2.94392 | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" 2.00000 | \n",
"
\n",
" \n",
" 25% | \n",
" 1.750000 | \n",
" 5.00000 | \n",
"
\n",
" \n",
" 50% | \n",
" 2.500000 | \n",
" 6.50000 | \n",
"
\n",
" \n",
" 75% | \n",
" 3.250000 | \n",
" 7.50000 | \n",
"
\n",
" \n",
" max | \n",
" 4.000000 | \n",
" 9.00000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"count 4.000000 4.00000\n",
"mean 2.500000 6.00000\n",
"std 1.290994 2.94392\n",
"min 1.000000 2.00000\n",
"25% 1.750000 5.00000\n",
"50% 2.500000 6.50000\n",
"75% 3.250000 7.50000\n",
"max 4.000000 9.00000"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe() # 获取快速统计信息"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "vietnamese-vulnerability",
"metadata": {},
"outputs": [],
"source": [
"# DataFrame 小结\n",
"\n",
"# index 获取行索引\n",
"# colums 获取列索引\n",
"# values 值 二维数组\n",
"# T 转置\n",
"# describe() 一些统计信息"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "patent-completion",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1.0 | \n",
" 1 | \n",
"
\n",
" \n",
" b | \n",
" 2.0 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1.0 1\n",
"b 2.0 7\n",
"c 3.0 9\n",
"x NaN 8"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "invalid-riding",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2['one']['a'] # Dataframe 切片 先选择 列 再选择 行"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "interested-holmes",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.loc['a','one'] # 推荐写法 先指定loc 根据标签选择 先选行 再选列"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "confused-benchmark",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 1.0\n",
"b 2.0\n",
"c 3.0\n",
"x NaN\n",
"Name: one, dtype: float64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2['one'] # 可以直接查看一列的数据"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "virtual-stocks",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"one 1.0\n",
"two 1.0\n",
"Name: a, dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.loc['a',:] # 查看 一行的数据"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "compliant-franklin",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 1.0 | \n",
" 1 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 1.0 1\n",
"c 3.0 9"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.loc[['a','c'],:] # 花式索引 可以指定 行 和 列"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "particular-being",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 1\n",
"c 9\n",
"Name: two, dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.loc[['a','c'],'two']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "valid-operator",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2 | \n",
" 1 | \n",
"
\n",
" \n",
" b | \n",
" 4 | \n",
" 7 | \n",
"
\n",
" \n",
" c | \n",
" 3 | \n",
" 9 | \n",
"
\n",
" \n",
" x | \n",
" 1 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2 1\n",
"b 4 7\n",
"c 3 9\n",
"x 1 8"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame对象 数据对齐 # # # # # # # \n",
"df1 = pd.DataFrame({\"one\":pd.Series([1,2,3,4],index=['x','a','c','b']),\"two\":pd.Series([7,9,1,8],index=['b','c','a','x'])})\n",
"df1"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "satellite-invalid",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 7 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9\n",
"b 1.0 8\n",
"c 3.0 7\n",
"x NaN 1"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame对象 数据对齐 # # # # # # # \n",
"df2 = pd.DataFrame({\"one\":pd.Series([1,2,3],index=['b','a','c']),\"two\":pd.Series([7,9,1,8],index=['c','a','x','b'])})\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "tired-signature",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 4.0 | \n",
" 10 | \n",
"
\n",
" \n",
" b | \n",
" 5.0 | \n",
" 15 | \n",
"
\n",
" \n",
" c | \n",
" 6.0 | \n",
" 16 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 4.0 10\n",
"b 5.0 15\n",
"c 6.0 16\n",
"x NaN 9"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 + df2"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "catholic-advantage",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 7 | \n",
"
\n",
" \n",
" x | \n",
" 0.0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9\n",
"b 1.0 8\n",
"c 3.0 7\n",
"x 0.0 1"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.fillna(0) # 填充数据"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "worthy-manor",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9\n",
"b 1.0 8\n",
"c 3.0 7"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dropna() # dropna 会把数据直接丢失掉"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "colonial-climb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 7 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9\n",
"b 1.0 8\n",
"c 3.0 7\n",
"x NaN 1"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "turkish-institution",
"metadata": {},
"outputs": [],
"source": [
"# 演示 只把 全是nan的行 数据全部丢掉\n",
"df2.loc['c','two'] = np.nan\n",
"df2.loc['x','two'] = np.nan"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "arranged-configuration",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 NaN\n",
"x NaN NaN"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "generic-audit",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "transparent-digest",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 NaN"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.dropna(how='all') # 只有整行是 nan 的才删除掉 how=all 默认是 any 只要行包含nan都会被删掉"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "dependent-resistance",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.loc['c','two'] = 9\n",
"df2.loc['x','two'] = 5\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "champion-president",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" two\n",
"a 9.0\n",
"b 8.0\n",
"c 9.0\n",
"x 5.0"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 当有 nan 的时候 删除一列\n",
"df2.dropna(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "activated-specific",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" b | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" c | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" x | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a False False\n",
"b False False\n",
"c False False\n",
"x True False"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "sexual-matrix",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" 2.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x 2.0 5.0"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" df2.fillna(2) # 填充 nan的值 为 2"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "intense-given",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" b | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" c | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" x | \n",
" False | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a True True\n",
"b True True\n",
"c True True\n",
"x False True"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.notnull()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "classified-roulette",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "structural-bumper",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"one 2.00\n",
"two 7.75\n",
"dtype: float64"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.mean() # 默认按列取 平均数"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "shaped-savage",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 5.5\n",
"b 4.5\n",
"c 6.0\n",
"x 5.0\n",
"dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.mean(axis=1) # 按照行取平均数"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "wooden-price",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"one 6.0\n",
"two 31.0\n",
"dtype: float64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sum() # 按照列 求和"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "express-train",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 11.0\n",
"b 9.0\n",
"c 12.0\n",
"x 5.0\n",
"dtype: float64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sum(axis=1) # 按照行 求和 "
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "flush-player",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "choice-anchor",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"b 1.0 8.0\n",
"a 2.0 9.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_values(by='one') # 按照列排序 正序 并且 nan都会被放在最后面"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "great-exposure",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"c 3.0 9.0\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"x NaN 5.0"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_values(by='one', ascending=False) # 按照列排序 倒序 并且 nan都会被放在最后面"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "included-variance",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" two | \n",
" one | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 9.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" b | \n",
" 8.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" c | \n",
" 9.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" x | \n",
" 5.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" two one\n",
"a 9.0 2.0\n",
"b 8.0 1.0\n",
"c 9.0 3.0\n",
"x 5.0 NaN"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_values(by='c',ascending=False, axis=1) # axis=1 按行排序 很少用, 了解一下"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "quick-theater",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "dated-major",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_index() # 按照索引排序"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "incorporate-founder",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"x NaN 5.0\n",
"c 3.0 9.0\n",
"b 1.0 8.0\n",
"a 2.0 9.0"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_index(ascending=False) # 按照索引排序 倒顺序 "
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "industrial-privilege",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" one | \n",
" two | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 2.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" b | \n",
" 1.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" c | \n",
" 3.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" x | \n",
" NaN | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" one two\n",
"a 2.0 9.0\n",
"b 1.0 8.0\n",
"c 3.0 9.0\n",
"x NaN 5.0"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "closed-investor",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" two | \n",
" one | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 9.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" b | \n",
" 8.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" c | \n",
" 9.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" x | \n",
" 5.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" two one\n",
"a 9.0 2.0\n",
"b 8.0 1.0\n",
"c 9.0 3.0\n",
"x 5.0 NaN"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_index(ascending=False,axis=1) # two\tone"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "warming-transmission",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"datetime.datetime(2020, 2, 20, 0, 0)"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import dateutil\n",
"dateutil.parser.parse('2020-02-20')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "representative-allergy",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pd' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'2020-12-20'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'2021-02-01'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined"
]
}
],
"source": [
"pd.to_datetime(['2020-12-20','2021-02-01'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "overall-chain",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}