html = """ <html><head><title>The Dormouse's title</title></head> <body> <p class="title" name="dromouse"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" id="link1"><!-- Elsie --></a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'lxml') # <title>The Dormouse's title</title> print(soup.title) # The Dormouse's title print(soup.title.string) # <head><title>The Dormouse's title</title></head> print(soup.head) # The Dormouse's story 当有多个p节点匹配成功时,只会选取第一个 print(soup.p.string) # dromouse 获取第一个p节点属性名为name的值 print(soup.p.attrs['name']) print(soup.p['name']) # html节点下的所有子孙类 for child in soup.html.children: print(child) # a节点的直接父节点 for i in soup.a.parent: print(i) # a节点的所有祖先节点 for i in soup.a.parents: print(i) # 兄弟节点 print('下一个兄弟节点', soup.a.next_sibling) print('上一个兄弟节点', soup.a.previous_sibling)