<output id="qn6qe"></output>

    1. <output id="qn6qe"><tt id="qn6qe"></tt></output>
    2. <strike id="qn6qe"></strike>

      亚洲 日本 欧洲 欧美 视频,日韩中文字幕有码av,一本一道av中文字幕无码,国产线播放免费人成视频播放,人妻少妇偷人无码视频,日夜啪啪一区二区三区,国产尤物精品自在拍视频首页,久热这里只有精品12

      Python爬取51job職位信息

      # -*- coding: utf-8 -*-
      # @Time    : 2018/3/1 16:38
      # @Author  : HT
      # @Email   : acer_yuhaitao@163.com
      # @File    : 51job.py
      # @Software: PyCharm
      import urllib
      import re
      import sqlite3
      import sys
      reload(sys)
      sys.setdefaultencoding('utf8')#處理打印中文字體用Unicode編碼
      
      i = 0#統(tǒng)計爬取總條目
      def url_input(url):
          """
          獲取網(wǎng)頁源碼html信息
          """
          get_html = urllib.urlopen(url)
          read_html = get_html.read().decode('gbk')
          return read_html
      
      def find_data(html):
          """
          用正則表達(dá)式獲取需要的信息
          """
          reg = re.compile(r'class="t1 ">.*?<a target="_blank" title="(.*?)".*?<span class="t2"><a target="_blank" title="(.*?)".*?<span class="t3">(.*?)</span>.*?<span class="t4">(.*?)</span>.*?<span class="t5">(.*?)</span>',re.S)
          items = re.findall(reg,html)
          return items
      
      def find_all_page(html):
          """
          從第一頁中獲取總頁數(shù)
          """
          reg = re.compile(r'<span class="td">(.*?)</span><input id="jump_page" class="mytxt" type="text" value="1"/>',re.S)
          page_all = re.findall(reg, html)
          num = re.sub("\D", "", page_all[0])#從共5頁中提取數(shù)字
          return num
      
      def data_to_sqlite(id,job,company,address,wages,date,jobname):
          """
          將信息存儲到數(shù)據(jù)庫
          """
          db = sqlite3.connect("D:\Python-Test\WeiXin\db.sqlite3")
          cursor = db.cursor()
          sql = "insert into '51job'(job,company,address,wages,date,jobname) values (\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\");"%(job,company,address,wages,date,jobname)
          try:
              cursor.execute(sql)
              db.commit()
          except Exception as e:
              print("ERRO:",e)
      
      def data_to_txt(str,jobname):
          """
          將信息存儲到文本
          """
          with open(u"51job%s.txt"%(jobname),'a+') as f:
              f.write(str)
      
      def print_items(data_items,jobname):
          """
          從正則匹配后的列表中獲取信息存儲打印
          """
          global i
          for data in data_items:
              job = data[0]
              company = data[1]
              address = data[2]
              wages = data[3]
              date = data[4]
              i = i + 1
              str1 ="["+str(i)+"] "+ job+"--"+company+"--"+address+"--"+wages+"--"+date+"\n"
              data_to_txt(str1,jobname)#存到文本
              data_to_sqlite(id, job, company, address, wages, date,jobname)#存到數(shù)據(jù)庫
              print(str1)
      
      
      def urlformat(urlstart):
          """
          返回{}.html格式字符串
          """
          url = re.sub('1.html','{}.html',urlstart)
          return url
      
      def get_page_html(page_num,urlstart):
          """
          輸入中頁數(shù),返回每一頁的url
          """
          list=[]
          for i in range(page_num):
              url = urlformat(urlstart)
              url = url.format(i)
              list.append(url)
          return list
      
      def all_job_get():
          """
          輸入多個職位名稱及第一頁url批量抓取
          """
          urldict = [
              {
                  'jobname': "python",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,Python%25E5%25BC%2580%25E5%258F%2591%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"嵌入式",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,Python%25E5%25BC%2580%25E5%258F%2591%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"云計算",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E4%25BA%2591%25E8%25AE%25A1%25E7%25AE%2597,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"機器學(xué)習(xí)",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,Python%25E5%25BC%2580%25E5%258F%2591%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"人工智能",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E6%259C%25BA%25E5%2599%25A8%25E5%25AD%25A6%25E4%25B9%25A0,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"自動駕駛",
                  'urlstart': 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E8%2587%25AA%25E5%258A%25A8%25E9%25A9%25BE%25E9%25A9%25B6,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
              {
                  'jobname': u"北上廣深python",
                  'urlstart': 'http://search.51job.com/list/010000%252C040000%252C020000%252C030200,000000,0000,00,9,99,python,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
              },
          ]
          for data in urldict:
              jobname = data['jobname']
              urlstart = data['urlstart']
              html = url_input(urlstart)
              all_page_num = int(find_all_page(html))
              print("+++++++++++++++++%s++++++++++++++++++++" % (all_page_num))
              urllist = get_page_html(all_page_num, urlstart)
              for url in urllist:
                  html = url_input(url)
                  data_items = find_data(html)
                  print_items(data_items, jobname)
              i = 0#批量抓取后換個職位重新計數(shù)
      def one_job_get():
          """
          單個職位信息抓取
          """
          # jobname = "python"
          # urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,Python%25E5%25BC%2580%25E5%258F%2591%25E5%25B7%25A5%25E7%25A8%258B%25E5%25B8%2588,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          # jobname = u"嵌入式"
          # urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E5%25B5%258C%25E5%2585%25A5%25E5%25BC%258F%25E5%25BC%2580%25E5%258F%2591,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          # jobname =u"云計算"
          # urlstart ='http://search.51job.com/list/010000,000000,0000,00,9,99,%25E4%25BA%2591%25E8%25AE%25A1%25E7%25AE%2597,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          # jobname =u"機器學(xué)習(xí)"
          # urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E6%259C%25BA%25E5%2599%25A8%25E5%25AD%25A6%25E4%25B9%25A0,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          # jobname =u"人工智能"
          # urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E4%25BA%25BA%25E5%25B7%25A5%25E6%2599%25BA%25E8%2583%25BD,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          # jobname =u"自動駕駛"
          # urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E8%2587%25AA%25E5%258A%25A8%25E9%25A9%25BE%25E9%25A9%25B6,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          #jobname =u"北上廣深python"
          #urlstart = 'http://search.51job.com/list/010000%252C040000%252C020000%252C030200,000000,0000,00,9,99,python,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=1&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
          jobname = u"BJ技術(shù)支持"
          urlstart = 'http://search.51job.com/list/010000,000000,0000,00,9,99,%25E6%258A%2580%25E6%259C%25AF%25E6%2594%25AF%25E6%258C%2581,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
      
          html = url_input(urlstart)#獲取首頁
          all_page_num = int(find_all_page(html))#從首頁獲取總共頁數(shù)
          print("+++++++++++++++++%s++++++++++++++++++++" % (all_page_num))
          urllist = get_page_html(all_page_num, urlstart)#獲取每一頁url存到列表里
          for url in urllist:#從列表里迭代每一頁url
              html = url_input(url)#獲取頁面url
              data_items = find_data(html)#查找信息返回職位等信息
              print_items(data_items, jobname)#將信息存到文本信息和數(shù)據(jù)庫
          i = 0
      
      if __name__ == '__main__':
          #all_job_get()
          one_job_get()

       

      posted @ 2018-03-01 22:16  喻曉生  閱讀(2093)  評論(0)    收藏  舉報
      主站蜘蛛池模板: 亚洲熟妇自偷自拍另欧美| 金塔县| 无码熟妇人妻av在线电影| 真人在线射美女视频在线观看 | 一区二区三区激情都市| 成年女人片免费视频播放A| 国产成年码AV片在线观看| 国内自拍视频一区二区三区| 最近免费中文字幕大全| 国产精品爽爽va在线观看网站| 欧美人禽杂交狂配| 色悠悠国产精品免费在线| 国产精品国三级国产av| 四虎国产精品永久入口| 苏州市| 亚洲一区二区精品另类| 青青草无码免费一二三区| 国产精品免费第一区二区| 久久国产精品免费一区| 国产精品扒开腿做爽爽爽a片唱戏| 色爱综合激情五月激情| 小嫩模无套内谢第一次| 久久国产成人高清精品亚洲| 草草线在成年免费视频2| 亚洲精品综合网在线8050影院| 亚洲2区3区4区产品乱码2021| 九九热精品免费在线视频| 一区二区丝袜美腿视频| 欧美成人va免费大片视频| 中文无码高潮到痉挛在线视频| 一区二区三区AV波多野结衣| 狠狠综合久久av一区二| 亚洲综合网中文字幕在线| 中文字幕有码高清日韩| 我和亲妺妺乱的性视频| 无码专区 人妻系列 在线| 国产精品普通话国语对白露脸| 宣武区| 色爱av综合网国产精品| 亚洲av日韩av一区久久| 黑人玩弄人妻中文在线|