html_doc = """The Dormouse's story $37
Once upon a time there were three little sisters; and their names wereElsie,Lacie andTillie;and they lived at the bottom of a well.
...
"""from bs4 import BeautifulSoupsoup = BeautifulSoup(html_doc, 'lxml')# print(soup)# print(type(soup))# 遍历文档树# 1、直接使用 *****print(soup.html)print(type(soup.html))print(soup.a)print(soup.p)# 2、获取标签的名称print(soup.a.name)# 3、获取标签的属性 *****print(soup.a.attrs) # 获取a标签中所有的属性print(soup.a.attrs['href'])# 4、获取标签的文本内容 *****print(soup.p.text) # $37# 5、嵌套选择print(soup.html.body.p)# 6、子节点、子孙节点print(soup.p.children) # 返回迭代器对象print(list(soup.p.children)) # [$37]# 7、父节点、祖先节点print(soup.b.parent)print(soup.b.parents)print(list(soup.b.parents))# 8、兄弟节点 (sibling: 兄弟姐妹)print(soup.a)# 获取下一个兄弟节点print(soup.a.next_sibling)# 获取下一个的所有兄弟节点,返回的是一个生成器print(soup.a.next_siblings)print(list(soup.a.next_siblings))# 获取上一个兄弟节点print(soup.a.previous_sibling)# 获取上一个的所有兄弟节点,返回的是一个生成器print(list(soup.a.previous_siblings))