parsel使用

基础使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from parsel import Selector

html = '''
<div>
<ul>
<li class="item-0">first item</li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-0 active"><a href="link3.html"><span class="bold">third item</span></a></li>
<li class="item-1 active"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
</ul>
</div>
'''
selector = Selector(text=html)
# 提取文本
items2 = selector.xpath('//li[contains(@class, "item-0")]//text()')
# 提取属性
items3 = selector.xpath('//li[contains(@class, "item-0")]/a/@href')
# 正则提取
result = selector.xpath('//li[contains(@class, "item-0")]/a/@href').re('link.*')
for item in items2:
# first item
# third item
# fifth item
print(item.get())
赏个🍗吧
0%