爬虫：chatgpt自动产生百度新闻的代码 ~ 睡在塔尖上的嘿嘿大蚂蚁

from bs4 import BeautifulSoup
import re

# 爬取百度新闻首页
url = "https://news.baidu.com/"
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html, 'html.parser')

# 找到所有新闻文章链接
news_list = soup.find_all('a', href=re.compile('^http(s)?://(news.baidu.com)?/.*\d{10,}.*\.html$'))

# 循环遍历新闻链接并抓取标题、链接、时间和内容
for news in news_list:
    try:
        news_url = news['href']
        news_response = requests.get(news_url)
        news_html = news_response.content
        news_soup = BeautifulSoup(news_html, 'html.parser')

        # 获取文章标题
        news_title = news_soup.find('title').text

        # 获取文章发表时间
        news_time = news_soup.find('span', class_='date').text

        # 获取文章内容
        news_content = ''
        for p in news_soup.find_all('p'):
            news_content += p.text.strip()

        print(news_title)
        print(news_url)
        print(news_time)
        print(news_content)
        print('\n')

    except Exception as e:
        print(e)

睡在塔尖上的嘿嘿大蚂蚁

走自己的路，让别人去说-（电报）https://t.me/Jasonzhangcn（Gmail）2020527blackant@gmail.com

爬虫：chatgpt自动产生百度新闻的代码

文章分类

狭路相逢，勇者胜