[Python]获取整个网页的文章标题和链接_源代码

日期：2023-12-11 栏目：暂未分类浏览：2106次评论：2条侧边栏

# 引入必备模块
import requests
from bs4 import BeautifulSoup

# 创建函数
def get_article_title(url):
    r = requests.get(url)

    # 如果网络状态不可访问，返回错误信息
    if r.status_code != 200:
        raise Exception()

    # 接收获取到的网络数据
    html_doc = r.text
    # 设置解析器
    soup = BeautifulSoup(html_doc, "html.parser")

    # 查询数据中的 h2 值
    h2_nodes = soup.find_all("h2")
    # 查询数据中的 h2 值，可填写属性值增加结果准确性
    # h2_nodes = soup.find_all("h2", class_="entry-title")

    # 遍历数据并打印
    for h2_node in h2_nodes:
        # 查询 h2 中的 a 标签值
        link = h2_node.find("a")

        # link["href"] 文章链接
        # link.get_text() 文章标题
        print(link.get_text(),"\t",link["href"] )

# 示例使用
url = 'https://www.vxia.net'
title = get_article_title(url)
print(title)

赞赏

赞赏方式：