| 
					
				 | 
			
			
				@@ -0,0 +1,606 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 重庆材料信息价爬虫 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 由于headless chrome “puppeteer”占用资源比较大,且材料信息价的网站渲染的是静态内容,因此不需要使用puppeteer。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 数据获取使用cheerio(解析html,可用类jquery语法操作生成的数据) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+module.exports = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    crawlData, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const cheerio = require('cheerio'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const axios = require('axios'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const querystring = require('querystring'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 页面类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const PageType = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    GENERAL: '/Index.aspx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    AREA: '/AreaIndex.aspx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    MIXED: '/ReadyMixedIndex.aspx', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 获取主要材料信息价格页面表单数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $ - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} props - 提交属性 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function getGeneralDataBody($, props) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const body = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTTARGET: props.eventTarget || '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTARGUMENT: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATE: $('#__VIEWSTATE').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$linkvv: props.period, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$linkcategory: props.materialClass || '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$LinkValue: `${props.classID},${props.period},${props.materialClass || ''}`, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$txtsonclass: `sonclass${props.classID}`, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$txtfatherclass: $('#ID_ucPrice_txtfatherclass').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$txtClassId: props.classID || '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$ddlSearchYear: '请选择', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$ddlSearchMonth: '请选择', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$txtSearchCailiao: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucPrice$UcPager1$listPage: props.page && String(props.page) || '1', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!props.eventTarget) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body.ID_ucPrice$btnLink = $('#ID_ucPrice_btnLink').val(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return body; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 获取各区县地方材料工地价格页面表单数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $ - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} props - 提交属性 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function getAreaDataBody($, props) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!props || !Object.keys(props).length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return {}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const body = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTTARGET: props.eventTarget || '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTARGUMENT: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATE: $('#__VIEWSTATE').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucAreaPrice$linkvv: props.period, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucAreaPrice$LinkValue: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucAreaPrice$dropArea: 'code', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucAreaPrice$txtSearchCailiao: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucAreaPrice$UcPager1$listPage: props.page && String(props.page) || '1', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!props.eventTarget) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body.ID_ucAreaPrice$btnAreaMaster = 'Button'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return body; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 获取预拌砂浆信息价格页面表单数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $ - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} props - 提交属性 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function getMixedDataBody($, props) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!props || !Object.keys(props).length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return {}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const body = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTTARGET: props.eventTarget || '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __EVENTARGUMENT: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATE: $('#__VIEWSTATE').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucReadyMixedPrice$linkvv: props.period, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucReadyMixedPrice$LinkValue: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucReadyMixedPrice$dropArea: 'code', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucReadyMixedPrice$txtSearchCailiao: '', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ID_ucReadyMixedPrice$UcPager1$listPage: props.page && String(props.page) || '1', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (!props.eventTarget) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body.ID_ucReadyMixedPrice$btnAreaMaster = 'Button'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return body; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 获取提交 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const TIME_OUT = 10000; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 创建axios实例 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const axiosInstance = axios.create({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    baseURL: 'http://www.cqsgczjxx.org/Jgxx/', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    timeout: TIME_OUT, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    proxy: { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        host: "127.0.0.1", port: "8888" // fiddler抓包 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    headers: { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'Cache-Control': 'max-age=0', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'Content-Type': 'application/x-www-form-urlencoded', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'Accept-Encoding': 'gzip, deflate', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    responseType: 'document' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 响应拦截器 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+axiosInstance.interceptors.response.use(function (response) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return response; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}, function (error) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 对响应错误做点什么 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (error.message.includes('timeout')) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return Promise.reject(`目标网络超时,请稍后再试。(${TIME_OUT}ms)`); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return Promise.reject(error); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 发起请求需要携带Cookie,否则一些请求会返回500错误(应该是网站的反爬措施) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+let curCookie = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 加载页面,获取可用类jquery操作的数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} url - 拼接的url 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} body - 表单数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {DOM-LIKE} - cheerio解析html得到的类dom数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function loadPage(url, body) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const config = {}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (curCookie) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        config.headers = { Cookie: curCookie }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const rst = body ? 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        await axiosInstance.post(url, querystring.stringify(body), config) : 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        await axiosInstance.post(url, null, config); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 更新cookie 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const cookies = rst.headers['set-cookie']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (Object.prototype.toString.call(cookies) === '[object Array]') { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        curCookie = cookies[0].split(';')[0]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return cheerio.load(rst.data); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const monthMap = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '1': '01', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '2': '02', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '3': '03', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '4': '04', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '5': '05', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '6': '06', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '7': '07', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '8': '08', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '9': '09', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '10': '10', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '11': '11', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    '12': '12', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 获取期数数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} from - 从哪一期开始 eg: 2020-01 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} to - 从哪一期结束 eg: 2020-05 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $index - cheerio加载的初始页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object] || Null} eg: {period: '2020-05', uid: 'XCCXXXXX-XX'} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function getPeriodData(from, to, $index) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (from > to) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return null; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $period = $index('#PriceLMenu') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 根据区间获取期数列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const reg = /(\d+)-(\d+)/; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const fromMatch = from.match(reg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const fromYear = +fromMatch[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const fromMonth = +fromMatch[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const toMatch = to.match(reg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const toYear = +toMatch[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const toMonth = +toMatch[2]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let curYear = fromYear; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let curMonth = fromMonth; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const list = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    while (curYear <= toYear && curMonth <= toMonth) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const uid = getPeriodUID(curYear, curMonth, $period); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 存在无效期数,直接返回空 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!uid) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return null; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        list.push({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            period: `${curYear}-${monthMap[curMonth]}`, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            uid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (curMonth === 12) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            curYear++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            curMonth = 1; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            curMonth++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return list; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    function getPeriodUID(year, month, $period) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const $year = $period.find('.MenuOneTitle').filter(function () { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return $index(this).text() === `${year}年`; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!$year.length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return null; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const $month = $year.parent().next().find('a').filter(function () { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return $index(this).text() === `${month}月`; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!$month.length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return null; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 期数uid在onclick中,需要提取出来 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const onclickText = $month.attr('onclick').toString(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const reg = /Onlink\('([^']+)'/; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const matched = onclickText.match(reg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!matched || !matched[1]) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return null; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return matched[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 表格类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const TableType = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    BUILDING: 1, // 主要材料中的建安工程材料和绿色 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    GARDEN: 2, // 主要材料中的园林绿化 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ENERGY: 3, // 主要材料中的节能建筑工程材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    AREA: 4, // 地区相关(各区县材料) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    MIXED: 5, // 地区相关(预拌砂浆) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取表格数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $page - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Number} type - 表格类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function crawlTableData($page, type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    switch (type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        case TableType.BUILDING: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        case TableType.ENERGY: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return crawlNormalTable($page); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        case TableType.GARDEN: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return crawlGardenTable($page); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        case TableType.AREA: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return crawlAreaTable($page, '#ID_ucAreaPrice_gridView'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        case TableType.MIXED: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return crawlAreaTable($page, '#ID_ucReadyMixedPrice_gridView'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取表格数据,表格列为: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 序号	| 材料名称 | 规格型号 | 单位 | 含税价(元) | 不含税价(元) | 备注 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $page - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function crawlNormalTable($page) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const colMap = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        0: 'name', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        1: 'specs', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        2: 'unit', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        3: 'taxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        4: 'noTaxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        5: 'remark' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const data = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let cur; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $tdList = $page('#ID_ucPrice_gridView').find('tr td span').filter(index => index % 7 !== 0); // 排除表头和序号列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $tdList.each(function (index) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const col = index % 6; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cur = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cur[colMap[col]] = $page(this).text(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 5) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data.push(cur); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    console.log(data); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return data; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取表格数据,表格列为: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 序号 | 科属 | 品名 | 高度(CM) | 干径(CM) | 冠径(CM) | 分枝高(CM) | 单位 | 含税价(元) | 不含税价(元) | 备注 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $page - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function crawlGardenTable($page) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const colMap = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        0: 'genera', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        1: 'name', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        2: 'height', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        3: 'branchDiameter', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        4: 'crownDiameter', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        5: 'branchHeight', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        6: 'unit', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        7: 'taxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        8: 'noTaxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        9: 'remark', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const data = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let cur; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $tdList = $page('#ID_ucPrice_gridView').find('tr td span').filter(index => index % 11 !== 0); // 排除表头和序号列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $tdList.each(function (index) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const col = index % 10; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cur = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cur[colMap[col]] = $page(this).text(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 9) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data.push(cur); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    console.log(data); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return data; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取表格数据,表格列为: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 序号 | 所属区县 | 材料名称 | 规格及型号 | 计量单位 | 含税价(元) | 不含税价(元) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $page - 页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} viewSelector - 表格选择器(ID) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object]} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function crawlAreaTable($page, viewSelector) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const colMap = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        0: 'area', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        1: 'name', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        2: 'specs', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        3: 'unit', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        4: 'taxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        5: 'noTaxPrice', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const data = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let cur; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $tdList = $page(viewSelector).find('tr td span').filter(index => index % 7 !== 0); // 排除表头和序号列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $tdList.each(function (index) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const col = index % 6; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 0) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cur = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cur[colMap[col]] = $page(this).text(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (col === 5) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data.push(cur); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    console.log(data); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return data; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+// 事件触发类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const EventTarget = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    GENERAL_NEXT: 'ID_ucPrice$UcPager1$btnNext', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    AREA_NEXT: 'ID_ucAreaPrice$UcPager1$btnNext', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    MIXED_NEXT: 'ID_ucReadyMixedPrice_UcPager1_btnNext', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取一页一页的表格数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $index - 索引页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} props - 提交的表单内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} pageType - 页面类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Number} tableType - 表格类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlPagesData($index, props, pageType, tableType) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let body; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let pageStateSelector; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (pageType === PageType.GENERAL) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body = getGeneralDataBody($index, props); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        pageStateSelector = '#ID_ucPrice_UcPager1_lbPage'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } else if (pageType === PageType.AREA) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body = getAreaDataBody($index, props); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        pageStateSelector = '#ID_ucAreaPrice_UcPager1_lbPage'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        body = getMixedDataBody($index, props); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        pageStateSelector = '#ID_ucReadyMixedPrice_UcPager1_lbPage'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $firstPage = await loadPage(pageType, body); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const rst = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取第一页数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    rst.push(...crawlTableData($firstPage, tableType)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取除第一页的数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取页码 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const pageState = $firstPage(pageStateSelector).text(); // eg: 1/10 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const totalPage = +pageState.split('/')[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const asyncCount = 6; // 最高批量次数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let curCount = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let task = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for (let page = 1; page < totalPage; page++) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        task.push(crawlPageData(page)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        curCount++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (curCount === asyncCount) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const allData = await Promise.all(task); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            allData.forEach(data => rst.push(...data)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            curCount = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            task = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (task.length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const allData = await Promise.all(task); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        allData.forEach(data => rst.push(...data)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return rst; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 爬取页码数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async function crawlPageData(page) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const pageProps = { ...props, page }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let body; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (pageType === PageType.GENERAL) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            pageProps.eventTarget = EventTarget.GENERAL_NEXT; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            body = getGeneralDataBody($firstPage, pageProps); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } else if (pageType === PageType.AREA) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            pageProps.eventTarget = EventTarget.AREA_NEXT; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            body = getAreaDataBody($firstPage, pageProps); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            pageProps.eventTarget = EventTarget.MIXED_NEXT; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            body = getMixedDataBody($firstPage, pageProps); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const $page = await loadPage(pageType, body); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return crawlTableData($page, tableType); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取建安工程材料和绿色、园林绿化工程材料、节能建筑工程材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} period - 期数uid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} classID - 工程分类id  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $index - 初始页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Number} type - 表格类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[object]} eg: [{ materialClass: '一、黑色及有色金属', items: [...] }] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlGeneralSubData(period, classID, $index, type) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const body = getGeneralDataBody($index, { period, classID }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    console.time('crawlGeneralSubData'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $engineeringClassPage = await loadPage(PageType.GENERAL, body); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const rst = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (type === TableType.BUILDING) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const classList = crawlMaterialClassList($index('#ID_ucPrice_CategoryLabel')); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!classList.length) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            throw '无法爬取到材料分类。'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        console.log(classList); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for (const materialClass of classList) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const obj = { materialClass, items: [] }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            obj.items = await crawlPagesData($engineeringClassPage, { period, classID, materialClass }, PageType.GENERAL, type); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            rst.push(obj); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } else { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const items = await crawlPagesData($engineeringClassPage, { period, classID, materialClass: '' }, PageType.GENERAL, type); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rst.push(...items); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    console.timeEnd('crawlGeneralSubData'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 爬取材料分类表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    function crawlMaterialClassList($class) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const list = []; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $class.find('a').each(function () { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const text = $engineeringClassPage(this).text(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            list.push(text); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return list; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取主要材料信息价格(这部分作为通用库) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} period - 期数uid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} $index - 初始页面内容 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Object} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlGeneralData(period, $index) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const { building, garden, energy } = crawlClass($index('#ID_ucPrice_tabNewBar')); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const rst = {}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (building) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rst.building = await crawlGeneralSubData(period, building, $index, TableType.BUILDING); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (garden) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 园林绿化工程材料下的数据所属分类为数据的"科属"列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rst.garden = await crawlGeneralSubData(period, garden, $index, TableType.GARDEN); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if (energy) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 绿色、节能建筑工程材料下的所有数据,所属分类均为“绿色、节能建筑工程材料”。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rst.energy = await crawlGeneralSubData(period, energy, $index, TableType.ENERGY); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return rst; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 爬取工程分类 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    function crawlClass($class) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 工程分类 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let building; // 建安工程材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let garden; // 园林绿化工程材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let energy; // 绿色、节能建筑工程材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const reg = /OnClassson\('([^']+)'/; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $class.find('a').each(function () { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const text = $index(this).text(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const onclickText = $index(this).attr('onclick').toString(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const matched = onclickText.match(reg); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (!matched || !matched[1]) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                throw '无法爬取到工程分类。'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if (text === '建安工程材料') { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                building = matched[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } else if (text === '园林绿化工程材料') { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                garden = matched[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } else if (text === '绿色、节能建筑工程材料') { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                energy = matched[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        }); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return { building, garden, energy }; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取各区县地方材料工地价格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} period - 期数uid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[objecy] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlAreaData(period) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取各区材料初始页 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $index = await loadPage(PageType.AREA); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取地区材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return await crawlPagesData($index, { period }, PageType.AREA, TableType.AREA); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取预拌砂浆信息价格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} period - 期数uid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Array[objecy] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlMixedData(period) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取各区材料初始页 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    const $index = await loadPage(PageType.MIXED); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // 获取地区材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return await crawlPagesData($index, { period }, PageType.MIXED, TableType.MIXED); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} period 期数 eg: '2020-05' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Object} generalData - 主要材料{ building, garden, energy } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Array[object]} areaData - 各地区材料 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {Array[object]} mixedData - 各地区预拌砂浆 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function transfromAndSave(period, generalData, areaData, mixedData) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 爬取数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} from - 从哪一期开始 eg: 2020-01 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @param {String} to - 从哪一期结束 eg: 2020-05 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * @return {Object} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+async function crawlData(from, to) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    let curPeriod; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const $index = await loadPage(PageType.GENERAL); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const periodData = getPeriodData(from, to, $index); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (!periodData) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            throw '无效的期数区间。'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        console.log(periodData); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 一期一期爬取数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for (const periodItem of periodData) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // 爬取主要材料信息价格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const generalData = await crawlGeneralData(periodItem.uid, $index); // 初始页面就是主要材料信息价的页面 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // 爬取各区县地方材料工地价格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const areaData = await crawlAreaData(periodItem.uid); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            // 爬取预拌砂浆信息价格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            const mixedData = await crawlMixedData(periodItem.uid); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+             
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            curPeriod = periodItem.period; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } catch (err) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        // 错误时提示已经成功爬取的期数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        let errTip = ''; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if (curPeriod) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            errTip += `\n成功爬取期数为:${from}到${curPeriod}`; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        const errStr = String(err) + errTip; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        console.log(`err`); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        console.log(errStr); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 |