# pip install Beautifulsoup4
# pip install requests
# 另外还需要在pycharm里安装这两个模块
import requests  # 伪造浏览器发起http请求
from bs4 import BeautifulSoup
'''
    将html格式的字符串解析成对象,通过对戏.find .find_all 查找
'''
response = requests.get("https://www.autohome.com.cn/news/")
response.encoding="gbk"
# print(response.text)
soup = BeautifulSoup(response.text,'html.parser')
div = soup.find(name="div",attrs={'id':'auto-channel-lazyload-article'})
# print(div.text)
li_list = div.find_all(name='li')
for li in  li_list:
    title = li.find(name='h3')
    if not title:
        continue
    p = li.find(name='p')
    a = li.find(name='a')
    print(title.text)
    print(a.attrs.get("href"))  # attrs：代表a标签的所有属性，获取a标签，获取的是一个字典，如果a标签里还有其他值，也可以获取到
    print(p.text)
    img = li.find(name='img')
    src = img.get("src")
    src = "https:" + src
    print(src)

    # 再次发起请求下载图片
    ret = requests.get(src)
    # ret.text # 是字符串
    # ret.content #是返回的二进制,没转换的

    file_name = src.rsplit("/",maxsplit=1)[1]
    with open(file_name,'wb') as f:
        f.write(ret.content)