爬取 携程

主页面url
http://flights.ctrip.com/booking/BJS-KMG-day-1.html?DDate1=2017-09-22

获取数据url
http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c802fde883e64a3c8fba0e99e45d023c&rk=2.7090996922689414192821&CK=1D7795E4EB96B7578AA673D6EF346C33&r=0.2337708871934405872019

首次搜索
http://flights.ctrip.com/domestic/Search/FirstRoute/?ddate1=2017-09-22&ddate2=2017-09-22&dcity1=BJS&acity1=KMG
http://flights.ctrip.com/domestic/booking/BJS-KMG---D-adu-1/?dayoffset=1&ddate1=2017-09-22&ddate2=2017-09-22
重新搜索
http://flights.ctrip.com/booking/bjs-kmg---d-adu-1/?ddate1=2017-09-22&ddate2=2017-09-22 往返
http://flights.ctrip.com/booking/BJS-KMG-day-1.html?DDate1=2017-09-22 单程

往返
var url = "//flights.ctrip.com/domesticsearch/search/SearchRoundRecommend?DCity1=BJS&ACity1=KMG&SearchType=D&DDate1=2017-09-22&ACity2=BJS&DDate2=2017-09-22&IsNearAirportRecommond=0&LogToken=3c10647ccad04412a33567f25335875a&CK=97017DBB47A37AFE6BE4D99105F78AFE";
单程
var url = "//flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=66c9c383c12f4dc685a9ad3e5834a7c4&CK=D6BF57FFFC0A4E89AF51CF7186A1944E";

根据直接获取json的url拿到数据解析后就能获得航班信息,这个url里有4个参数,需要从搜索主页的页面里获取
单程和往返的原理一样,就是往返多了一个返回日期,其他都一样

LogToken不变 r不变 CK有变化 rk用Math.random

LogToken
rk
CK
r 在主页面的第34行

LogToken和CK根据主页面24行的url可以得到,r值在主页面34行可以得到

获取的json数据的url,爬取数据用的url
单程
http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c96a69a7ca9142be9f0e3192f44f9f07&rk=1.445840834132689163412&CK=6A53EEBD27EBBE6785AF7951EC03EB1E&r=0.3795051054613884121115
往返
http://flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=D&DDate1=2017-09-22&ACity2=BJS&DDate2=2017-09-22&IsNearAirportRecommond=0&LogToken=584465ec999045db9a546e1ce6a04a3f&rk=4.057381591787214165342&CK=D76BF5FFFC0A4E89AF51CF7186A1944E&r=0.543402675163338674414

<!DOCTYPE html>
<html>
<head>
    <meta http-equiv="Cache-Control" content="no-transform " /> 
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<meta name="description" content="携程旅行网为您提供北京到昆明特价机票以及北京到昆明航班查询。携程旅行网于2003年在美国纳斯达克上市,拥有覆盖全国的服务网络。提供国内国际各大航空公司的航线航班,安全支付值得信赖,是您网上订购北京到昆明机票的首选。免费咨询800-820-6666。" />
<meta name="keywords" content="北京到昆明特价,北京到昆明机票预订,北京到昆明航班查询,携程机票" />
<title>北京到昆明机票预订 - 北京到昆明特价机票 - 北京到昆明航班查询预订 - 携程国内机票预订</title>

    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />    
    <link rel="dns-prefetch" href="//webresource.c-ctrip.com" />
    <link rel="dns-prefetch" href="//pic.c-ctrip.com" />
    <link rel="dns-prefetch" href="//images3.c-ctrip.com" />
    <link rel="dns-prefetch" href="//crm.ws.ctrip.com" />
    <link rel="dns-prefetch" href="//s.c-ctrip.com" />
    <link rel="dns-prefetch" href="//www.google-analytics.com" />
    <link rel="canonical" href="//flights.ctrip.com/domestic/booking/BJS-KMG-day-1.html"/>
    <link href="//webresource.c-ctrip.com/ResFlightOnline/R2/Booking/css/fltdomestic111027/searchresult_v2.1.css?ReleaseNo=CR_2017_09_20_21_00_00" type="text/css" rel="stylesheet" />

</head>
<body class="gray_body">
    
        <script type="text/javascript">
    var url = "//flights.ctrip.com/domesticsearch/search/SearchFirstRouteFlights?DCity1=BJS&ACity1=KMG&SearchType=S&DDate1=2017-09-22&IsNearAirportRecommond=0&LogToken=c802fde883e64a3c8fba0e99e45d023c&CK=D7795E14EB96B7578AA673D6EF346C33";
    var _searchCount_c = 0;
    
    function ajaxRequest(n,t){var i=null,e,f,l,o,s,r,c,u,h;if(typeof XMLHttpRequest!="undefined")i=new XMLHttpRequest;else if(typeof ActiveXObject!="undefined"){if(typeof arguments.callee.aciveXString!="string")for(e=["MSXML2.XMLHttp.6.0","MSXML2.XMLHttp.3.0","MSXML2.XMLHttp"],f=0,l=e.length;f<l;f++)try{i=new ActiveXObject(e[f]);arguments.callee.activeXString=e[f];break}catch(a){}i==null&&(i=new ActiveXObject(arguments.callee.activeXString))}i.onreadystatechange=function(){try{if(i.readyState==4)if(i.status>=200&&i.status<300||i.status==304){var r=eval("("+i.responseText+")");if(_searchCount_c==0&&r&&r.Error&&(r.Error.Code==104||r.Error.Code==1004)&&(r.Error.Message==""||!r.Error.Message)){_searchCount_c++;setTimeout(function(){var i=n.split("&");i.pop();ajaxRequest(i.join("&")+"&rt="+Math.random()*1e3,t)},1e3);return}jsonCallback.done(r)}else i.status!=0&&jsonCallback.onError()}catch(u){jsonCallback.onError()}};window.location.hash&&(o=window.location.hash.match(/DDate1=\d{4}-\d{2}-\d{2}/),o&&o.length>0&&(n=n.replace(/DDate1=(\d{4}-\d{2}-\d{2})/ig,o[0])),s=window.location.hash.match(/DDate2=\d{4}-\d{2}-\d{2}/),s&&s.length>0&&(n=n.replace(/DDate2=(\d{4}-\d{2}-\d{2})/ig,s[0])));r=n.replace(/^[\s\xA0]+|[\s\xA0]+$/g,"");(r.indexOf("ClassType=CF")==-1||r.indexOf("ClassType=&")!=-1)&&(r+=getStorage("FD_SearchPage_onlyCf")=="CF"?"&ClassType=CF":"");_searchCount_c>0&&(c=t.split(".")[1],t="0."+c.substring(1,c.length-1));u=r.split("&");h=r.indexOf("rk=")>=0||r.indexOf("rt=")>=0?u.splice(u.length-2,1)[0]:u.pop();u.push("CK=");h=h.split("=")[1];var fn=(function(u,r,k,t){var Z21=1,M21Z=1;Z21=Z21+=parseInt(Math.cos(7) * 0xa);Z21=Z21-=parseInt(Math.tan(7) * 0xa);if(Z21<0)Z21=-Z21; while(Z21>30)Z21=Z21%10;M21Z=M21Z+=parseInt(Math.sin(6) * 0xa);M21Z=M21Z+=parseInt(Math.cos(6) * 0xa);M21Z=M21Z*=parseInt(Math.log(6) * 0xa);if(M21Z<0)M21Z=-M21Z; while(M21Z>30)M21Z=M21Z%10;(function(r,u,x,y,t,k){if(!window.location.href){return;}var l=r.split(''); var c=l.splice(y,1);l.splice(x,0,c);t.open('GET', u.join('&')+l.join('') + '&r=' + k, !0);t.send(null);})(r,u,Z21,M21Z,t,k)});fn(u,h,t,i)}var jsonCallback={isError:!1,isReady:!1,data:{},readyList:[],errorList:[],ready:function(n){this.isReady==!1?this.readyList.push(n):n(this.data)},done:function(n){this.isReady=!0;this.data=n;for(var t=0;this.readyList[t];)this.readyList[t](n),t++},error:function(n){this.isError==!1?this.errorList.push(n):n()},onError:function(){this.isError=!0;for(var n=0;this.errorList[n];)this.errorList[n](),n++}},getStorage=function(n){var i,r,t;try{if(i="{}",window.localStorage)i=localStorage.getItem("jStorage");else if(window.globalStorage)i=window.globalStorage[window.location.hostname];else{r=document.head||document.getElementsByTagName("head")[0];t=document.createElement("link");t.style.behavior="url(#default#userData)";r.appendChild(t);try{t.load("jStorage")}catch(u){t.setAttribute("jStorage","{}");t.save("jStorage");t.load("jStorage")}i=t.getAttribute("jStorage")||"{}";r.removeChild(t)}return!i||i=="{}"?"":eval("("+i+")")[n]}catch(f){return""}}


    
    var searchRouteIndex = "0";
    var isCivil = false;
    var roundTripCombinationSwitch = false;
    ajaxRequest(url + '&rk=' + Math.random()*10+'192821','0.2337708871934405872019');
</script>

<div>
  中间省略
</div>

</body>
</html>
function ajaxRequest(n, t) {
    var i = null, e, f, l, o, s, r, c, u, h;
    if (typeof XMLHttpRequest != "undefined") i = new XMLHttpRequest; else if (typeof ActiveXObject != "undefined") {
        if (typeof arguments.callee.aciveXString != "string") for (e = ["MSXML2.XMLHttp.6.0", "MSXML2.XMLHttp.3.0", "MSXML2.XMLHttp"], f = 0, l = e.length; f < l; f++) try {
            i = new ActiveXObject(e[f]);
            arguments.callee.activeXString = e[f];
            break
        } catch (a) {
        }
        i == null && (i = new ActiveXObject(arguments.callee.activeXString))
    }
    i.onreadystatechange = function () {
        try {
            if (i.readyState == 4) if (i.status >= 200 && i.status < 300 || i.status == 304) {
                var r = eval("(" + i.responseText + ")");
                if (_searchCount_c == 0 && r && r.Error && (r.Error.Code == 104 || r.Error.Code == 1004) && (r.Error.Message == "" || !r.Error.Message)) {
                    _searchCount_c++;
                    setTimeout(function () {
                        var i = n.split("&");
                        i.pop();
                        ajaxRequest(i.join("&") + "&rt=" + Math.random() * 1e3, t)
                    }, 1e3);
                    return
                }
                jsonCallback.done(r)
            } else i.status != 0 && jsonCallback.onError()
        } catch (u) {
            jsonCallback.onError()
        }
    };
    window.location.hash && (o = window.location.hash.match(/DDate1=\d{4}-\d{2}-\d{2}/), o && o.length > 0 && (n = n.replace(/DDate1=(\d{4}-\d{2}-\d{2})/ig, o[0])), s = window.location.hash.match(/DDate2=\d{4}-\d{2}-\d{2}/), s && s.length > 0 && (n = n.replace(/DDate2=(\d{4}-\d{2}-\d{2})/ig, s[0])));
    r = n.replace(/^[\s\xA0]+|[\s\xA0]+$/g, "");
    (r.indexOf("ClassType=CF") == -1 || r.indexOf("ClassType=&") != -1) && (r += getStorage("FD_SearchPage_onlyCf") == "CF" ? "&ClassType=CF" : "");
    _searchCount_c > 0 && (c = t.split(".")[1], t = "0." + c.substring(1, c.length - 1));
    u = r.split("&");
    h = r.indexOf("rk=") >= 0 || r.indexOf("rt=") >= 0 ? u.splice(u.length - 2, 1)[0] : u.pop();
    u.push("CK=");
    h = h.split("=")[1];
    var fn = (function (u, r, k, t) {
        var Z21 = 1, M21Z = 1;
        Z21 = Z21 += parseInt(Math.cos(7) * 0xa);
        Z21 = Z21 -= parseInt(Math.tan(7) * 0xa);
        if (Z21 < 0) Z21 = -Z21;
        while (Z21 > 30) Z21 = Z21 % 10;
        M21Z = M21Z += parseInt(Math.sin(6) * 0xa);
        M21Z = M21Z += parseInt(Math.cos(6) * 0xa);
        M21Z = M21Z *= parseInt(Math.log(6) * 0xa);
        if (M21Z < 0) M21Z = -M21Z;
        while (M21Z > 30) M21Z = M21Z % 10;
        (function (r, u, x, y, t, k) {
            if (!window.location.href) {
                return;
            }
            var l = r.split('');
            var c = l.splice(y, 1);
            l.splice(x, 0, c);
            t.open('GET', u.join('&') + l.join('') + '&r=' + k, !0);
            t.send(null);
        })(r, u, Z21, M21Z, t, k)
    });
    fn(u, h, t, i)
}

var jsonCallback = {
    isError: !1,
    isReady: !1,
    data: {},
    readyList: [],
    errorList: [],
    ready: function (n) {
        this.isReady == !1 ? this.readyList.push(n) : n(this.data)
    },
    done: function (n) {
        this.isReady = !0;
        this.data = n;
        for (var t = 0; this.readyList[t];) this.readyList[t](n), t++
    },
    error: function (n) {
        this.isError == !1 ? this.errorList.push(n) : n()
    },
    onError: function () {
        this.isError = !0;
        for (var n = 0; this.errorList[n];) this.errorList[n](), n++
    }
}, getStorage = function (n) {
    var i, r, t;
    try {
        if (i = "{}", window.localStorage) i = localStorage.getItem("jStorage"); else if (window.globalStorage) i = window.globalStorage[window.location.hostname]; else {
            r = document.head || document.getElementsByTagName("head")[0];
            t = document.createElement("link");
            t.style.behavior = "url(#default#userData)";
            r.appendChild(t);
            try {
                t.load("jStorage")
            } catch (u) {
                t.setAttribute("jStorage", "{}");
                t.save("jStorage");
                t.load("jStorage")
            }
            i = t.getAttribute("jStorage") || "{}";
            r.removeChild(t)
        }
        return !i || i == "{}" ? "" : eval("(" + i + ")")[n]
    } catch (f) {
        return ""
    }
}

数据解析

├─ als  航空公司  {MU: "东方航空", ZH: "深圳航空", CA: "中国国航"}
├─ apb  机场  {TYN12: "武宿国际机场T2", PEK2: "首都国际机场T2", YCU1169: "关公机场", PEK3: "首都国际机场T3", NKG987: "禄口国际机场T2"}
├─ fis  
    ├─ acc  到达地  YCU   
    ├─ acn  目的地  运城 
	├─ alc  CA  航空公司
	├─ apbn 机场名  关公机场  
	├─ apc  YCU
	├─ asmsn 航站楼  
	├─ at   到达时间  2017-10-20 13:50:00
	├─ 
	├─ confort  里面有历史准点率
	      ├─  ArrivedBridge
		  ├─  BoardingWay
		  ├─  DepartBridge
		  ├─  HistoryPunctuality
		  ├─  HistoryPunctualityArr  
		  ├─  SubClassList
		  ├─
	├─
	├─ dcc  出发地  BJS
	├─ dcn  出发地  北京
	├─ dpbn 出发机场  首都国际机场T3
    ├─ dpc  PEK
	├─ dsmsn 航站楼 
	├─ dt   2017-10-20 11:55:00
	├─ fn   航班号  CA1237
	├─ lcfp  票价
	├─ lp 实际票价
	├─ pr 准点率  96.6100006103516
	├─ rt 折扣 
	├─ sdft 
	├─ tax  税 
├─ lps 半年内 每天的机票价格
├─ tf  中转组合
    ├─ Routes  
	├─ 0
	   ├─ fis
	       ├─ 
	   ├─ tcn
	├─ at
	├─ c
	├─ cn
	├─ desc
	├─ dt
	├─ fcrtp
	├─ fut
	├─ icp
	├─ ics
	├─ imre
	├─ ire
	├─ k
	├─ 

References

[1] 用Python抓取携程网机票信息 过程纪实(上篇)
[2] 用Python抓取携程网机票信息 过程纪实(下篇)
[3] get-ctrip-data
[4] post-803