记得我们第三关的时候爬取了豆瓣TOP250的电影名/评分/推荐语/链接,现在呢,我们要把它们存储下来,记得用今天课上学的csv和excel,分别存储下来哦~
URL
1 import csv 2 import openpyxl 3 import requests 4 from bs4 import BeautifulSoup 5 6 # 保存成CSV文件 7 8 with open('02.csv','w',newline='',encoding='utf-8') as csv_file: 9 writer = csv.writer(csv_file)10 writer.writerow(['编号','电影名','评分','推荐语','链接'])11 12 for page in range(0,250,25):13 res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page))14 soup = BeautifulSoup(res.text,'html.parser')15 items = soup.find(class_='grid_view').find_all('li')16 for item in items:17 num = item.find('em').text18 name = item.find('span').text19 rate = item.find(class_='rating_num').text20 try:21 inq = item.find(class_='inq').text22 except:23 inq = ''24 url = item.find('a')['href']25 26 writer.writerow([num,name,rate,inq,url])27 28 # 保存成excel29 30 wb = openpyxl.Workbook()31 sheet = wb.active32 sheet.title = 'TOP250'33 sheet['A1'] = '编号'34 sheet['B1'] = '电影名'35 sheet['C1'] = '评分'36 sheet['D1'] = '推荐语'37 sheet['E1'] = '链接'38 39 for page in range(0,250,25):40 res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page))41 soup = BeautifulSoup(res.text,'html.parser')42 items = soup.find(class_='grid_view').find_all('li')43 for item in items:44 num = item.find('em').text45 name = item.find('span').text46 rate = item.find(class_='rating_num').text47 try:48 inq = item.find(class_='inq').text49 except:50 inq = ''51 url = item.find('a')['href']52 53 sheet.append([num,name,rate,inq,url])54 55 wb.save('02.xlsx')