{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "accepted-fields", "metadata": {}, "outputs": [], "source": [ "# DataFrame Series 两种数据结构\n", "# 集成时间序列功能\n", "# 提供丰富的数学运算和操作\n", "# 灵活处理缺失数据\n", "# pip install pandas 安装" ] }, { "cell_type": "code", "execution_count": 2, "id": "steady-mason", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2\n", "1 3\n", "2 4\n", "3 5\n", "dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "pd.Series([2,3,4,5])" ] }, { "cell_type": "code", "execution_count": 3, "id": "broke-combination", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 0\n", "c 0\n", "d 0\n", "dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series(0,index=['a','b','c','d'])" ] }, { "cell_type": "code", "execution_count": 4, "id": "arbitrary-ideal", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 3\n", "c 4\n", "d 5\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n", "sr" ] }, { "cell_type": "code", "execution_count": 5, "id": "finnish-nigeria", "metadata": {}, "outputs": [], "source": [ "# Series 是一种类似于数组的对象,由一组数据和一组与之相关的数据标签(索引) 组成\n", "\n", "# sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n", "# Series 支持 array的特性(下标)\n", "# 标量运算\n", "# 两个 series 运算\n", "# 索引\n", "# 切片\n", "# 通用函数\n", "# 布尔值过滤 st[sr>0]\n", "\n", "# Series支持字典的特性(标签)\n", "# 从字典创建 Series Series(dict)\n", "# in 预算 'a' in sr\n", "# 键索引 sr['a'] sr['a','b','c']" ] }, { "cell_type": "code", "execution_count": 6, "id": "interstate-child", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr[0] # 定义了标签 abcd 但是 仍然可以使用下标访问" ] }, { "cell_type": "code", "execution_count": 7, "id": "ranging-hunger", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 4\n", "b 6\n", "c 8\n", "d 10\n", "dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr+sr" ] }, { "cell_type": "code", "execution_count": 8, "id": "cloudy-governor", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 3\n", "dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr[0:2]" ] }, { "cell_type": "code", "execution_count": 9, "id": "accessory-dryer", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "a 1\n", "b 2\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr = pd.Series({'a':1,'b':2}) # 通过字典创建 Series\n", "sr" ] }, { "cell_type": "code", "execution_count": 10, "id": "buried-confusion", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'a' in sr # in 操作" ] }, { "cell_type": "code", "execution_count": 11, "id": "native-august", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n" ] } ], "source": [ "for i in sr: # for循环 得到的是 values ---Series\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 12, "id": "bridal-portland", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a\n", "b\n" ] } ], "source": [ "data1 = {'a':1,'b':2} # for 循环得到的是 key --- 字典\n", "for i in data1: \n", " print(i)" ] }, { "cell_type": "code", "execution_count": 13, "id": "starting-nashville", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['a', 'b'], dtype='object')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.index" ] }, { "cell_type": "code", "execution_count": 14, "id": "alternative-heather", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 2])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.values" ] }, { "cell_type": "code", "execution_count": 15, "id": "planned-binary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 3\n", "c 4\n", "d 5\n", "dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n", "sr" ] }, { "cell_type": "code", "execution_count": 16, "id": "appropriate-evening", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "c 4\n", "dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr[['a','c']] # 花式索引也支持" ] }, { "cell_type": "code", "execution_count": 17, "id": "finnish-wilderness", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 3\n", "c 4\n", "dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr['a':'c'] # 通过 标签切片" ] }, { "cell_type": "code", "execution_count": 18, "id": "postal-lemon", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "0 2\n", "1 3\n", "2 4\n", "3 5\n", "4 6\n", "5 7\n", "6 8\n", "7 9\n", "8 10\n", "9 11\n", "10 12\n", "11 13\n", "12 14\n", "13 15\n", "14 16\n", "15 17\n", "16 18\n", "17 19\n", "dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# series 整数索引 注意事项 使用整数索引的时候 一定要使用 loc iloc\n", "\n", "sr = pd.Series(np.arange(2,20))\n", "sr" ] }, { "cell_type": "code", "execution_count": 19, "id": "thrown-hypothesis", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 12\n", "11 13\n", "12 14\n", "13 15\n", "14 16\n", "15 17\n", "16 18\n", "17 19\n", "dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2 = sr[10:].copy()\n", "sr2" ] }, { "cell_type": "code", "execution_count": 20, "id": "distributed-ghost", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2[10] # 默认是标签 不使用 iloc 则 解释为 标签" ] }, { "cell_type": "code", "execution_count": 21, "id": "similar-complexity", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2.iloc[0] # iloc 代表使用下标" ] }, { "cell_type": "code", "execution_count": 22, "id": "discrete-promotion", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2.iloc[-1]" ] }, { "cell_type": "code", "execution_count": 23, "id": "novel-destination", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10 12\n", "11 13\n", "12 14\n", "dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2.iloc[:3]" ] }, { "cell_type": "code", "execution_count": 24, "id": "sharing-average", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "13" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.loc[11] # loc 代表使用的是 标签" ] }, { "cell_type": "code", "execution_count": 25, "id": "strong-receipt", "metadata": {}, "outputs": [], "source": [ "##### Serice 数据对齐 ######################\n", "# pandas 在对两个 Series对象作运算时,会按索引对数据进行对齐 然后计算" ] }, { "cell_type": "code", "execution_count": 26, "id": "renewable-optimization", "metadata": {}, "outputs": [], "source": [ "sr1 = pd.Series([11,34,53],index=['a','c','b'])\n", "sr2 = pd.Series([2,1,5,4],index=['c','a','b','x'])" ] }, { "cell_type": "code", "execution_count": 27, "id": "blocked-carbon", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 11\n", "c 34\n", "b 53\n", "dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr1" ] }, { "cell_type": "code", "execution_count": 28, "id": "later-highland", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "c 2\n", "a 1\n", "b 5\n", "x 4\n", "dtype: int64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr2" ] }, { "cell_type": "code", "execution_count": 29, "id": "clear-berlin", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "x NaN\n", "dtype: float64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr1+sr2 # 当长度不一样的时候 不存在的值x 被设为 NaN" ] }, { "cell_type": "code", "execution_count": 30, "id": "legendary-blast", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "x NaN\n", "dtype: float64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr1.add(sr2) # 和 sr1+sr2效果一样" ] }, { "cell_type": "code", "execution_count": 31, "id": "sitting-extra", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "x 4.0\n", "dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr1.add(sr2,fill_value=0) # fill_value=0 显示为 默认有的 Series值" ] }, { "cell_type": "code", "execution_count": 32, "id": "acute-behalf", "metadata": {}, "outputs": [], "source": [ "sr = sr1+sr2 " ] }, { "cell_type": "code", "execution_count": 33, "id": "attended-madagascar", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "x NaN\n", "dtype: float64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr" ] }, { "cell_type": "code", "execution_count": 34, "id": "sweet-threat", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a False\n", "b False\n", "c False\n", "x True\n", "dtype: bool" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.isnull() " ] }, { "cell_type": "code", "execution_count": 35, "id": "potential-manual", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a True\n", "b True\n", "c True\n", "x False\n", "dtype: bool" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.notnull() " ] }, { "cell_type": "code", "execution_count": 36, "id": "rocky-rings", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "dtype: float64" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.dropna() # 删掉缺失值" ] }, { "cell_type": "code", "execution_count": 37, "id": "seeing-dinner", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.0\n", "b 58.0\n", "c 36.0\n", "x 0.0\n", "dtype: float64" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.fillna(0) # 所有nan的值赋值为 0" ] }, { "cell_type": "code", "execution_count": 38, "id": "oriental-general", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 12.000000\n", "b 58.000000\n", "c 36.000000\n", "x 35.333333\n", "dtype: float64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.fillna(sr.mean()) # 把缺失值 填充为平均值" ] }, { "cell_type": "code", "execution_count": 39, "id": "animal-sport", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "35.333333333333336" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sr.mean() # 算平均值 会把Nan自动过滤掉" ] }, { "cell_type": "code", "execution_count": null, "id": "round-america", "metadata": {}, "outputs": [], "source": [ "############ Series 小结\n", "\n", "# 数组 + 字典\n", "# 整数索引 注意事项 需要使用 loc iloc\n", "# 数据对齐 当运算的时候 是根据标签对齐的, 数据缺失的处理 fillna dropna\n" ] }, { "cell_type": "code", "execution_count": 40, "id": "gentle-finnish", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.88605203734286" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "52580.1/59342" ] }, { "cell_type": "code", "execution_count": null, "id": "mechanical-citation", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 5 }