精华内容
下载资源
问答
  • pyquery

    2020-04-19 20:37:27
    代码: # -*- coding: utf-8 -*- # @Time : 2020/4/19 19:54 # @Author : Oneqq # @File : 21.pyquery的使用.py ...from pyquery import PyQuery as pq import requests from fake_useragent import ...

    代码:

    # -*- coding: utf-8 -*-
    # @Time : 2020/4/19 19:54
    # @Author : Oneqq
    # @File : 21.pyquery的使用.py
    # @Software: PyCharm
    
    from pyquery import PyQuery as pq
    import requests
    from fake_useragent import UserAgent
    
    url = "https://www.xicidaili.com/"
    headers = {
        "User-Agent": UserAgent().random
    }
    response = requests.get(url, headers=headers)
    doc = pq(response.text)
    trs = doc("#ip_list tr")
    
    for num in range(1, len(trs)):
        ip = trs.eq(num).find("td").eq(1).text()
        port = trs.eq(num).find("td").eq(2).text()
        types = trs.eq(num).find("td").eq(5).text()
        if ip != '':
            print(ip, ":", port, ":", types)
    

    结果:

    展开全文
  • PyQuery

    2020-02-19 11:49:33
    PyQuery 强大又灵活的网页解析库。如果你觉得正则写起来太麻烦,如果你觉得BeautifulSoup语法太难记,如果你熟悉jQuery的语法,那么PyQuery就是你的最佳选择。 文章目录PyQuery字符串初始化URL初始化基本CSS选择器...

    PyQuery

    强大又灵活的网页解析库。如果你觉得正则写起来太麻烦,如果你觉得BeautifulSoup语法太难记,如果你熟悉jQuery的语法,那么PyQuery就是你的最佳选择。

    字符串初始化

    html = '''
    <div id="container">
        <ul class="list">
            <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
        </ul>
    </div>
    '''
    from pyquery import PyQuery as pq
    doc = pq(html) # 声明pyquery对象
    print(doc('li'))   #同样使用CSS选择器,规则相似
    

    输出

    <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
    

    URL初始化

    from pyquery import PyQuery as pq
    doc = pq(url="http://www.baidu.com")  #自动请求链接,并返回html
    #另外还可以通过向filename传递参数进行文件初始化
    print(doc('head'))
    

    输出

    <head><meta http-equiv="content-type" content="text/html;charset=utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=Edge"/><meta content="always" name="referrer"/><link rel="stylesheet" type="text/css" href="http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css"/><title>ç™¾åº¦ä¸€ä¸‹ï¼Œä½ å°±çŸ¥é“</title></head>
    

    基本CSS选择器

    from pyquery import PyQuery as pq
    doc = pq(html)  # html见前例
    print(doc("#container .list li"))  
    #之间不一定非要有父子关系,只需要有层级关系
    
    <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
    

    查找元素

    1.子元素

    from pyquery import PyQuery as pq
    doc = pq(html)
    items = doc(".list")
    print(type(items))
    print(items)
    lis = items.find("li")  # 所有结果
    print(type(lis))
    print(lis)
    

    输出

    <class 'pyquery.pyquery.PyQuery'>   
    #为pyquery对象说明可以调用与之相关的一切方法
    <ul class="list">
            <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
        </ul>
    <class 'pyquery.pyquery.PyQuery'>
    <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
    
    from pyquery import PyQuery as pq
    doc = pq(html)
    items = doc(".list")
    lis = items.children()  
    # 查找直接子元素,也可以向children中传递参数用以筛选
    print(items)
    print(lis)
    
    <ul class="list">
            <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
        </ul>
    
    <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
    

    2.父元素

    from pyquery import PyQuery as pq
    doc = pq(html)
    items = doc(".list")
    container = items.parent()  #有且仅有一个父节点
    print(type(container))
    print(container)
    

    输出

    <class 'pyquery.pyquery.PyQuery'>
    <div id="container">
        <ul class="list">
            <li class="item-0">first item</li>
            <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
        </ul>
    </div>
    
    html = '''
    <div class="wrap">
        <div id="container">
            <ul class="list">
                <li class="item-0">first item</li>
                <li class="item-1"><a href="link2.html">second item</a></li>
                <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
                <li class="item-1 active"><a href="link4.html">fourth item</a></li>
                <li class="item-0"><a href="link5.html">fifth item</a></li>
            </ul>
        </div>
    </div>
    '''
    from pyquery import PyQuery as pq
    doc = pq(html)
    items = doc(".list")
    parents = items.parents()
    #所有的祖先节点,每次输出一遍,可传入CSS选择器再进行筛选
    print(type(parents))
    print(parents)
    

    输出

    <class 'pyquery.pyquery.PyQuery'>
    <div class="wrap">
        <div id="container">
            <ul class="list">
                <li class="item-0">first item</li>
                <li class="item-1"><a href="link2.html">second item</a></li>
                <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
                <li class="item-1 active"><a href="link4.html">fourth item</a></li>
                <li class="item-0"><a href="link5.html">fifth item</a></li>
            </ul>
        </div>
    </div><div id="container">
            <ul class="list">
                <li class="item-0">first item</li>
                <li class="item-1"><a href="link2.html">second item</a></li>
                <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
                <li class="item-1 active"><a href="link4.html">fourth item</a></li>
                <li class="item-0"><a href="link5.html">fifth item</a></li>
            </ul>
        </div>
    

    3.兄弟元素

    from pyquery import PyQuery as pq
    doc = pq(html)
    li = doc(".list .item-0.active")   
    #前面class=list代表要在此标签中寻找,
    #后面两个class之间无空格代表要求class同时满足item-0与active
    print(li.siblings())  #此时会输出除筛选标签以外的所有兄弟元素,
    #还可以传入参数,例如”active”,则会在结果中筛选带有active的兄弟元素
    

    输出

    <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-0">first item</li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
    

    遍历

    from pyquery import PyQuery as pq
    doc = pq(html)
    li = doc("li").items()
    print(type(li))
    for i in li:
        print(i)  #每一个i又是一个pyquery元素,可以使用pyquery方法
    

    输出

    <class 'generator'>
    <li class="item-0">first item</li>
    <li class="item-1"><a href="link2.html">second item</a></li>
    <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
    <li class="item-1 active"><a href="link4.html">fourth item</a></li>
    <li class="item-0"><a href="link5.html">fifth item</a></li>
    

    获取信息

    获取属性

    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc('.item-0.active a')  #空格表示a位于前者之内
    print(a)
    print(a.attr('href'))   #获取属性
    print(a.attr.href)   #获取属性
    

    输出

    <a href="link3.html"><span class="bold">third item</span></a>
    link3.html
    link3.html
    

    获取文本

    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc('.item-0.active a')
    print(a)
    print(a.text())  # 选中文字
    

    输出

    <a href="link3.html"><span class="bold">third item</span></a>
    third item
    

    获取html

    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc('.item-0.active')
    print(a)
    print(a.html()) #除去li标签,剩下的内容
    

    输出

    <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
    <a href="link3.html"><span class="bold">third item</span></a>
    

    DOM操作

    addClass、removeClass

    from pyquery import PyQuery as pq
    doc = pq(html)
    li = doc('.item-0.active')
    print(li)
    li.remove_class('active')  # 移除
    print(li)
    li.add_class('active')  # 增添
    print(li)
    

    输出

    <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
    <li class="item-0"><a href="link3.html"><span class="bold">third item</span></a></li>
    <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
    

    attr、css

    from pyquery import PyQuery as pq
    doc = pq(html)
    li = doc('.item-0.active')
    print(li)
    li.attr('name','link')   #若name属性不存在,则会添加;
                             #若已存在,则改变
    print(li)
    li.css('font-size','14px')   #添加style属性
    print(li)
    
    <li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
    <li class="item-0 active" name="link"><a href="link3.html"><span class="bold">third item</span></a></li>
    <li class="item-0 active" name="link" style="font-size: 14px"><a href="link3.html"><span class="bold">third item</span></a></li>
    

    remove等等其他DOM方法

    伪类选择器

    from pyquery import PyQuery as pq
    doc = pq(html)
    li1 = doc('li:first-child')  #第一个子标签
    li2 = doc('li:last-child')   #最后一个子标签
    li3 = doc('li:nth-child(2)')   #索引为2的子标签
    li4 = doc('li:gt(2)')  #索引比2大的标签,从0计数
    li5 = doc('li:nth_child(2n)')   #索引为偶数的子标签
    li6 = doc('li:contains(second)')    #子标签中含有second字样的标签
    print(li1,"\n",li2,"\n",li3,"\n",li4,"\n",li5,"\n",li6)
    

    输出

    <li class="item-0">first item</li>
             
     <li class="item-0"><a href="link5.html">fifth item</a></li>
         
     <li class="item-1"><a href="link2.html">second item</a></li>
             
     <li class="item-1 active"><a href="link4.html">fourth item</a></li>
            <li class="item-0"><a href="link5.html">fifth item</a></li>
         
     <li class="item-1"><a href="link2.html">second item</a></li>
            <li class="item-1 active"><a href="link4.html">fourth item</a></li>
             
     <li class="item-1"><a href="link2.html">second item</a></li>
    
    展开全文
  • pyQuery

    2019-05-16 19:50:30
    使用PyQuery去实现抓取 from pyquery import PyQuery import requests def req(url): response = requests.get(url) return response.content.decode('utf-8') # 使用这个openser可以自定义用requests模块还是...

    使用PyQuery去实现抓取

    from pyquery import PyQuery
    import requests
    
    def req(url):
    	response = requests.get(url)
    	return response.content.decode('utf-8')
    
    # 使用这个openser可以自定义用requests模块还是urllib模块去请求网址
    doc = PyQuery(url='http://www.baidu.com',opener=req)
    
    print(doc)
    
    当然还可以自己设置一些请求头
    
    
    from pyquery import PyQuery
    headers = {
                'User-Agent': 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87',
                'Referer': ''
            }
    doc = PyQuery('http://www.baidu.com',encodeing='utf-8',headers=headers)
    
    展开全文
  • Pyquery

    2019-08-06 16:29:00
    介绍 pyquery库是jQuery的Python实现,可以用于解析HTML网页内容,能够以jQuery的语法来操作解析 ...安装: pip install pyquery 字符串的形式初始化 html = """ <html lang="en"> <head> ...

    介绍

    pyquery库是jQuery的Python实现,可以用于解析HTML网页内容,能够以jQuery的语法来操作解析 HTML 文档,易用性和解析速度都很好

    初始化

    安装: pip install pyquery

    字符串的形式初始化

    html = """
    <html lang="en">
        <head>
            简单好用的
            <title>PyQuery</title>
        </head>
        <body>
            <ul id="container">
                <li class="object-1">Python</li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            </ul>
        </body>
    </html>
    """
    from pyquery import PyQuery as pq
    doc = pq(html)
    print(doc("title"))
    
    <title>PyQuery</title>

    URL初始化

    # PyQuery对象首先会请求这个url,用得到的HTML内容完成初始化
    from pyquery import PyQuery as pq
    doc = pq(url="https://home.cnblogs.com/u/q240756200/")
    print(doc("title"))
    
    <title>__Invoker - 博客园</title>&#13;
    
    doc = pq(requests.get("https://home.cnblogs.com/u/q240756200/").text) 

    print(doc("title"))
    <title>__Invoker - 博客园</title>&#13; # 两种方法相同

    文件初始化

    # 读取本地的html文件以字符串的形式传递给PyQuery类来初始 化
    from pyquery import PyQuery as pq
    doc = pq(filename="demo.html") # demo.html为本地文件 print(doc("title"))

    css选择器

    html = """
    <html lang="en">
        <head>
            简单好用的
            <title>PyQuery</title>
        </head>
        <body>
            <ul id="container">
                <li class="object-1">Python</li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            </ul>
        </body>
    </html>
    """
    # 先选取id为container的节点,在选取内部class属性为object-1的节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    print(doc("#container .object-1"))
    print(type(doc("#container .object-1"))) # 输出类型还是PyQuery类型
    
    <li class="object-1">Python</li>
                
    <class 'pyquery.pyquery.PyQuery'>

    查找节点

    html = """
    <html lang="en">
        <head>
            简单好用的
            <title>PyQuery</title>
        </head>
        <body>
            <ul id="container">
                <li class="object-1">
                    Python
                    <span>你好</span>
                </li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            </ul>
        </body>
    </html>
    """

    子节点

    获取所有子孙节点

    # 获取所有子孙节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc("#container")
    lis = a.find("li")   # 查询的范围是节点的所有子孙节点
    print(lis)
    
    <li class="object-1">
                    Python
                    <span>你好</span>
                </li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            

    获取所有子节点

    # 获取所有子节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc("#container")
    li = a.children()
    print(li)

    通过css选择器选择子节点中的某个节点

    # 通过css选择器选择子节点中的某个节点  筛选出子节点中class属性为object-1的节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc("#container")
    li = a.children(".object-1")
    print(li)
    
    <li class="object-1">
                    Python
                    <span>你好</span>
                </li>

    父节点

    直接父节点

    # 这里的父节点是该节点的直接父节点
    from pyquery import PyQuery as pq
    doc = pq(html1)
    a= doc(".object-1")
    li = a.parent()
    print(li)
    
    <ul id="container">
                <li class="object-1">
                    Python
                    <span>你好</span>
                </li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            </ul>

    祖先节点

    # 获取所有父节点,即祖先节点
    from pyquery import PyQuery as pq
    doc = pq(html1)
    a = doc(".object-1")
    li = a.parents()
    print(li)
    # 结果会有两个,一个是父级节点一个是祖先节点

    通过css选择器选择父节点中的某个节点

    from pyquery import PyQuery as pq
    doc = pq(html1)
    a = doc(".object-1")
    li = a.parents("#container")
    print(li)
    
    <ul id="container">
                <li class="object-1">
                    Python
                    <span>你好</span>
                </li>
                <li class="object-2">大法</li>
                <li class="object-3">好</li>
            </ul>

    兄弟节点

    获取所有兄弟节点

    # 获取所有兄弟节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc(".object-1")
    li = a.siblings()
    print(li)
    
    <li class="object-2">大法</li>
                <li class="object-3">好</li>

    通过css选择器选择兄弟节点中的某个节点

    # 通过css选择器选择兄弟节点中的某个节点
    from pyquery import PyQuery as pq
    doc = pq(html)
    a = doc(".object-1")
    li = a.siblings(".object-3")
    print(li)
    
    <li class="object-3">好</li>

    遍历

    - 上面选择节点的结果可能是多个节点,也可能是单个节点类型都是pyquery类型

    单个节点可以直接用str转换成字符串直接打印

    doc = pq(html)
    a = doc(".object-1")
    li = a.siblings(".object-3")
    print(str(li))
    print(type(str(li)))
    
    <li class="object-3">好</li>
            
    <class 'str'>

    查询结果为多个节点需要遍历来获取

    # 查询结果为多个节点需要遍历来获取
    # 多个节点需要调用items方法
    doc = pq(html)
    a = doc("li").items()    # 调用items会得到一个生成器
    print(a)
    
    for i in a:    # 循环生成器取出每个节点,类型也是pyquery
        print(i)
    
    
    <generator object PyQuery.items at 0x00000254B449CCA8>
    <li class="object-1">
                    Python
                    <span>你好</span>
                </li>
                
    <li class="object-2">大法</li>
                
    <li class="object-3">好</li>

    获取信息

    html = """
    <html lang="en">
        <head>
            简单好用的
            <title>PyQuery</title>
        </head>
        <body>
            <ul id="container">
                <li class="object-1">
                    Python
                    <a href="www.taobao.com">world</a>
                    <a href="www.baidu.com">hello</a>
                    
                </li>
                <li class="object-2">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>
                <li class="object-3">好</li>
            </ul>
        </body>
    </html>
    """

    获取属性

    # 找到某个节点后,就可以调用attr()方法来获取属性   
    a = doc(".object-1")
    # print(a.find("a").attr("href"))    
    # 当返回结果包含多个节点时,调用attr()方法只会得到第一个节点的属性
    
    # 如果想要获取所有a节点的属性,需要使用遍历
    for i in a.find("a").items():
        print(i.attr("href"))
    
    
    www.taobao.com
    www.baidu.com

    获取文本

    1
    2
    3
    4
    - 调用text()方法获取文本
    - 当我们得到的结果是多个节点时
        - text()  可以获取到匹配标签内的所有文本,返回的是所有文本内容组成的字符串
        - html()  返回的是匹配到的所有节点中的第一个节点内的html文本,如果想要获取所有节点中的html需要遍历

    获取纯文本

    # 获取纯文本
    doc = pq(html)
    li = doc("li")
    li = li.text()
    print(li)
    Python world hello 大法 world 好

    获取节点内的HTML

    # 获取节点内的HTML    带标签 只能获取匹配到的第一个节点内的HTML
    doc = pq(html)
    li = doc("li")
    print(li.html())
    
    Python
    <a href="www.taobao.com">world</a>
    <a href="www.baidu.com">hello</a>

    获取节点内的所有HTML

    # 遍历获取所有节点中的html
    doc = pq(html)
    li = doc("li")
    for i in li.items():
        print(i.html())
    
    Python
    <a href="www.taobao.com">world</a>
    <a href="www.baidu.com">hello</a>
    大法
    <a href="www.taobao.com">world</a>         
    好

    节点操作

    html = """
    <html lang="en">
        <head>
            简单好用的
            <title>PyQuery</title>
        </head>
        <body>
            <ul id="container">
                <li class="object-1">
                    Python
                    <a href="www.taobao.com">world</a>
                    <a href="www.baidu.com">hello</a>
                    
                </li>
                <li class="object-2">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>
                <li class="object-3">好</li>
            </ul>
        </body>
    </html>
    """

    删除属性

    doc = pq(html)
    a = doc(".object-2")
    print(a)
    a.removeClass("object-2")   # 删除object-2这个class属性
    print(a)
    
    <li class="object-2">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>
                
    <li class="">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>

    添加属性

    doc = pq(html)
    a = doc(".object-2")
    print(a)
    a.removeClass("object-2")   # 删除object-2这个class属性
    print(a)
    a.addClass("item")     # 给该标签添加一个item的class属性
    print(a)
    
    <li class="object-2">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>
                
    <li class="">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>
                
    <li class="item">
                    大法
                    <a href="www.taobao.com">world</a>
                </li>

    attr

    # 属性操作  【一个参数是查找 两个参数是设置属性】
    # 修改属性
    doc = pq(html)
    a = doc(".object-1")
    a.attr("name","henry")  # 给li标签添加一个name属性,值为henry
    print(a)
    
    <li class="object-1" name="henry">
                    Python
                    <a href="www.taobao.com">world</a>
                    <a href="www.baidu.com">hello</a>
                    
                </li>

    text

    # 文本操作  【有参数是添加或修改文本内容 没有参数是查找所有文本内容】
    # 文本内容操作
    doc = pq(html)
    a = doc(".object-1")
    a.text("hello world")
    print(a)
    
    <li class="object-1">hello world</li>

    html

    # 标签操作  【有参数是添加或修改标签 没有参数是查找第一个标签,获取所有需要遍历】
    # 标签操作
    doc = pq(html)
    a = doc(".object-1")
    a.html("<span>span标签</span>")
    print(a)
    
    <li class="object-1"><span>span标签</span></li>

    伪类选择器

    html = """
        <div class="wrap">
            <div id="container">
                <ul class="list">
                    <li class="item-0">fist item</li>
                    <li class="item-1"><a href="link1.html">second</a></li>
                    <li class="item-0 active"><a href="link2.html"><span class="bold">third item</span></a></li>
                    <li class="item-1 active"><a href="link3.html">fourth item</a></li>
                    <li class="item-0"><a href="link4.html">fifth item</a></li>
                </ul>
            </div>
        </div>
    """

    选择第一个节点

    # 选择第一个节点
    doc = pq(html)
    a = doc("li:first-child")
    print(a)
    
    <li class="item-0">fist item</li>

    选择最后一个节点

    # 选择最后一个节点
    doc = pq(html)
    a = doc("li:last-child")
    print(a)
    
    <li class="item-0"><a href="link4.html">fifth item</a></li>

    选择指定节点

    # 选择第2个li节点
    doc = pq(html)
    a = doc("li:nth-child(2)")
    print(a)
    
    <li class="item-1"><a href="link1.html">second</a></li>

    选择指定节点之后的节点

    # 选择第2个节点之后的所有节点
    doc = pq(html)
    a = doc("li:gt(2)")
    print(a)
    
    <li class="item-1 active"><a href="link3.html">fourth item</a></li>
                    <li class="item-0"><a href="link4.html">fifth item</a></li>

    选择偶数节点

    # 选择偶数位值节点
    doc = pq(html)
    a = doc("li:nth-child(2n)")
    print(a)
    
    <li class="item-1"><a href="link1.html">second</a></li>
                    <li class="item-1 active"><a href="link3.html">fourth item</a></li>

    包含哪些文本的节点

    # 包含second文本的节点
    doc = pq(html)
    a = doc("li:contains(second)")
    print(a)
    
    <li class="item-1"><a href="link1.html">second</a></li>
    展开全文

空空如也

空空如也

1 2 3 4 5 ... 20
收藏数 3,648
精华内容 1,459
关键字:

pyquery