{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "accepted-fields",
   "metadata": {},
   "outputs": [],
   "source": [
    "# DataFrame Series  两种数据结构\n",
    "# 集成时间序列功能\n",
    "# 提供丰富的数学运算和操作\n",
    "# 灵活处理缺失数据\n",
    "# pip install pandas 安装"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "steady-mason",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2\n",
       "1    3\n",
       "2    4\n",
       "3    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.Series([2,3,4,5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "broke-combination",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    0\n",
       "c    0\n",
       "d    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(0,index=['a','b','c','d'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "arbitrary-ideal",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    3\n",
       "c    4\n",
       "d    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n",
    "sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "finnish-nigeria",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Series 是一种类似于数组的对象,由一组数据和一组与之相关的数据标签(索引) 组成\n",
    "\n",
    "# sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n",
    "# Series 支持 array的特性(下标)\n",
    "# 标量运算\n",
    "# 两个 series 运算\n",
    "# 索引\n",
    "# 切片\n",
    "# 通用函数\n",
    "# 布尔值过滤 st[sr>0]\n",
    "\n",
    "# Series支持字典的特性（标签）\n",
    "# 从字典创建 Series  Series(dict)\n",
    "# in 预算  'a' in sr\n",
    "# 键索引 sr['a'] sr['a','b','c']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "interstate-child",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr[0] #  定义了标签 abcd 但是 仍然可以使用下标访问"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ranging-hunger",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     4\n",
       "b     6\n",
       "c     8\n",
       "d    10\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr+sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cloudy-governor",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr[0:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "accessory-dryer",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "b    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr = pd.Series({'a':1,'b':2})  # 通过字典创建 Series\n",
    "sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "buried-confusion",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'a' in sr  # in 操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "native-august",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n"
     ]
    }
   ],
   "source": [
    "for i in sr: # for循环 得到的是 values  ---Series\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "bridal-portland",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a\n",
      "b\n"
     ]
    }
   ],
   "source": [
    "data1 = {'a':1,'b':2}  # for 循环得到的是 key --- 字典\n",
    "for i in data1: \n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "starting-nashville",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['a', 'b'], dtype='object')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "alternative-heather",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "planned-binary",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    3\n",
       "c    4\n",
       "d    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr = pd.Series([2,3,4,5],index=['a','b','c','d'])\n",
    "sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "appropriate-evening",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "c    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr[['a','c']] # 花式索引也支持"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "finnish-wilderness",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    3\n",
       "c    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr['a':'c'] # 通过 标签切片"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "postal-lemon",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      2\n",
       "1      3\n",
       "2      4\n",
       "3      5\n",
       "4      6\n",
       "5      7\n",
       "6      8\n",
       "7      9\n",
       "8     10\n",
       "9     11\n",
       "10    12\n",
       "11    13\n",
       "12    14\n",
       "13    15\n",
       "14    16\n",
       "15    17\n",
       "16    18\n",
       "17    19\n",
       "dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# series 整数索引 注意事项 使用整数索引的时候 一定要使用 loc iloc\n",
    "\n",
    "sr = pd.Series(np.arange(2,20))\n",
    "sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "thrown-hypothesis",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    12\n",
       "11    13\n",
       "12    14\n",
       "13    15\n",
       "14    16\n",
       "15    17\n",
       "16    18\n",
       "17    19\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2 = sr[10:].copy()\n",
    "sr2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "distributed-ghost",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2[10] # 默认是标签 不使用 iloc 则 解释为 标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "similar-complexity",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2.iloc[0]  # iloc 代表使用下标"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "discrete-promotion",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2.iloc[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "novel-destination",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    12\n",
       "11    13\n",
       "12    14\n",
       "dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2.iloc[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "sharing-average",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.loc[11] # loc 代表使用的是 标签"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "strong-receipt",
   "metadata": {},
   "outputs": [],
   "source": [
    "##### Serice 数据对齐 ######################\n",
    "# pandas 在对两个 Series对象作运算时,会按索引对数据进行对齐 然后计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "renewable-optimization",
   "metadata": {},
   "outputs": [],
   "source": [
    "sr1 = pd.Series([11,34,53],index=['a','c','b'])\n",
    "sr2 = pd.Series([2,1,5,4],index=['c','a','b','x'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "blocked-carbon",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    11\n",
       "c    34\n",
       "b    53\n",
       "dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "later-highland",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "c    2\n",
       "a    1\n",
       "b    5\n",
       "x    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "clear-berlin",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "x     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr1+sr2 # 当长度不一样的时候 不存在的值x 被设为 NaN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "legendary-blast",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "x     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr1.add(sr2)  #  和 sr1+sr2效果一样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "sitting-extra",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "x     4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr1.add(sr2,fill_value=0) # fill_value=0  显示为 默认有的 Series值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "acute-behalf",
   "metadata": {},
   "outputs": [],
   "source": [
    "sr = sr1+sr2 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "attended-madagascar",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "x     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "sweet-threat",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    False\n",
       "b    False\n",
       "c    False\n",
       "x     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.isnull() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "potential-manual",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     True\n",
       "b     True\n",
       "c     True\n",
       "x    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.notnull() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "rocky-rings",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.dropna() # 删掉缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "seeing-dinner",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.0\n",
       "b    58.0\n",
       "c    36.0\n",
       "x     0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.fillna(0) # 所有nan的值赋值为 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "oriental-general",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    12.000000\n",
       "b    58.000000\n",
       "c    36.000000\n",
       "x    35.333333\n",
       "dtype: float64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.fillna(sr.mean()) # 把缺失值 填充为平均值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "animal-sport",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "35.333333333333336"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sr.mean() # 算平均值       会把Nan自动过滤掉"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "round-america",
   "metadata": {},
   "outputs": [],
   "source": [
    "############  Series 小结\n",
    "\n",
    "# 数组 + 字典\n",
    "# 整数索引 注意事项 需要使用 loc iloc\n",
    "# 数据对齐 当运算的时候  是根据标签对齐的, 数据缺失的处理 fillna  dropna\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "gentle-finnish",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.88605203734286"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "52580.1/59342"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "mechanical-citation",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}