千家信息网

python爬取贴吧图片并下载

发表于:2025-12-02 作者:千家信息网编辑
千家信息网最后更新 2025年12月02日,# cording = utf-8import urllib2import urllibimport reimport randomimport timedef get_url(url): Ag
千家信息网最后更新 2025年12月02日python爬取贴吧图片并下载
# cording = utf-8import urllib2import urllibimport reimport randomimport timedef get_url(url):    Agent_list = ['Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36',    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11"]      ip_list = ['123.169.165.255:9999','117.69.13.64:9999','223.198.1.147:9999']        user_agent = random.choice(Agent_list)        httpproxy_handler = urllib2.ProxyHandler({'http':random.choice(ip_list)})        opener = urllib2.build_opener(httpproxy_handler)        urllib2.install_opener(opener)        page = urllib2.Request(url)       page.add_header('User-Agent',user_agent)        response = urllib2.urlopen(page)        html = response.read()        return html       def down_img(html):    reg = r'src="(http:.+?\.jpg)'      urlre = re.compile(reg)        imglist = re.findall(urlre,html)        for img in imglist:            filename = img.split("/")[-1]                urllib.urlretrieve(img,filename,None)        while True:                url = raw_input("请输入下载图片的贴吧地址:")        if url == q:           print("运行终止!")           break        else:         print('开始获取网页信息...')       get_url(url)       print('获取网页信息成功!')       print('开始下载图片...')       down_img(get_url(url))       print('图片下载完成!')        timesleep(3)
0