payload = {'key1': 'value1', 'key2': 'value2'} r ="", data=payload)
post 请求多用来提交表单数据,即填写一堆输入框,然后提交。
1 2 3 4
r = requests.put("") r = requests.delete("") r = requests.head("") r = requests.options("")
html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="title"><b>The Dormouse's story</b></p> <p class="story">Once upon a time there were three little sisters; and their names were <a href="" class="sister" id="link1">Elsie</a>, <a href="" class="sister" id="link2">Lacie</a> and <a href="" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ soup = BeautifulSoup(html_doc, 'lxml') #声明BeautifulSoup对象 find = soup.find('p') #使用find方法查到第一个p标签 print("find's return type is ", type(find)) #输出返回值类型 print("find's content is", find) #输出find获取的值 print("find's Tag Name is ", #输出标签的名字 print("find's Attribute(class) is ", find['class']) #输出标签的class属性值
Comment 对象
这个对象其实就是 HTML 和 XML 中的注释。
1 2 3 4 5
markup = "<b><!--Hey, buddy. Want to buy a used parser?--></b>" soup = BeautifulSoup(markup, 'lxml') comment = soup.b.string type(comment) # <class 'bs4.element.Comment'>
soup.find_all("title") # [<title>The Dormouse's story</title>] # soup.find_all("p", "title") # [<p class="title"><b>The Dormouse's story</b></p>] # soup.find_all("a") # [<a class="sister" href="" id="link1">Elsie</a>, # <a class="sister" href="" id="link2">Lacie</a>, # <a class="sister" href="" id="link3">Tillie</a>] # soup.find_all(id="link2") # [<a class="sister" href="" id="link2">Lacie</a>] # import re soup.find(string=re.compile("sisters")) # u'Once upon a time there were three little sisters; and their names were\n'
name 参数:可以查找所有名字为 name 的 tag。 attr 参数:就是 tag 里的属性。 string 参数:搜索文档中字符串的内容。 recursive 参数: 调用 tag 的 find_all () 方法时,Beautiful Soup 会检索当前 tag 的所有子孙节点。如果只想搜索 tag 的直接子节点,可以使用参数 recursive=False 。