|
@@ -0,0 +1,956 @@
|
|
|
+/**
|
|
|
+ * @author vian
|
|
|
+ * 重庆材料信息价爬虫
|
|
|
+ * 由于headless chrome “puppeteer”占用资源比较大,且材料信息价的数据是ssr的静态内容,因此不需要使用puppeteer。
|
|
|
+ * 数据获取使用cheerio(解析html,可用类jquery语法操作生成的数据)
|
|
|
+ */
|
|
|
+
|
|
|
+module.exports = {
|
|
|
+ crawlData,
|
|
|
+};
|
|
|
+
|
|
|
+const cheerio = require('cheerio');
|
|
|
+const axios = require('axios');
|
|
|
+const querystring = require('querystring');
|
|
|
+const uuidV1 = require('uuid/v1');
|
|
|
+const mongoose = require('mongoose');
|
|
|
+const { isDef } = require('../../../public/common_util');
|
|
|
+const { SSL_OP_SSLEAY_080_CLIENT_DH_BUG } = require('constants');
|
|
|
+
|
|
|
+const compilationModel = mongoose.model('compilation');
|
|
|
+const priceInfoLibModel = mongoose.model('std_price_info_lib');
|
|
|
+const priceInfoClassModel = mongoose.model('std_price_info_class');
|
|
|
+const priceInfoItemModel = mongoose.model('std_price_info_items');
|
|
|
+const priceInfoAreaModel = mongoose.model('std_price_info_areas');
|
|
|
+
|
|
|
+const isDebug = true;
|
|
|
+
|
|
|
+function debugConsole(str, type = 'log') {
|
|
|
+ if (isDebug) {
|
|
|
+ console[type](str);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// 页面类型
|
|
|
+const PageType = {
|
|
|
+ GENERAL: '/Index.aspx',
|
|
|
+ AREA: '/AreaIndex.aspx',
|
|
|
+ MIXED: '/ReadyMixedIndex.aspx',
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * 获取主要材料信息价格页面表单数据
|
|
|
+ * @param {Object} $ - 页面内容
|
|
|
+ * @param {Object} props - 提交属性
|
|
|
+ */
|
|
|
+function getGeneralDataBody($, props) {
|
|
|
+ const body = {
|
|
|
+ __EVENTTARGET: props.eventTarget || '',
|
|
|
+ __EVENTARGUMENT: '',
|
|
|
+ __VIEWSTATE: $('#__VIEWSTATE').val(),
|
|
|
+ __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(),
|
|
|
+ ID_ucPrice$linkvv: props.period,
|
|
|
+ ID_ucPrice$linkcategory: props.materialClass || '',
|
|
|
+ ID_ucPrice$LinkValue: `${props.classID},${props.period},${props.materialClass || ''}`,
|
|
|
+ ID_ucPrice$txtsonclass: `sonclass${props.classID}`,
|
|
|
+ ID_ucPrice$txtfatherclass: $('#ID_ucPrice_txtfatherclass').val(),
|
|
|
+ ID_ucPrice$txtClassId: props.classID || '',
|
|
|
+ ID_ucPrice$ddlSearchYear: '请选择',
|
|
|
+ ID_ucPrice$ddlSearchMonth: '请选择',
|
|
|
+ ID_ucPrice$txtSearchCailiao: '',
|
|
|
+ ID_ucPrice$UcPager1$listPage: props.page && String(props.page) || '1',
|
|
|
+ };
|
|
|
+ if (!props.eventTarget) {
|
|
|
+ body.ID_ucPrice$btnLink = $('#ID_ucPrice_btnLink').val();
|
|
|
+ }
|
|
|
+ return body;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 获取各区县地方材料工地价格页面表单数据
|
|
|
+ * @param {Object} $ - 页面内容
|
|
|
+ * @param {Object} props - 提交属性
|
|
|
+ */
|
|
|
+function getAreaDataBody($, props) {
|
|
|
+ if (!props || !Object.keys(props).length) {
|
|
|
+ return {};
|
|
|
+ }
|
|
|
+ const body = {
|
|
|
+ __EVENTTARGET: props.eventTarget || '',
|
|
|
+ __EVENTARGUMENT: '',
|
|
|
+ __VIEWSTATE: $('#__VIEWSTATE').val(),
|
|
|
+ __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(),
|
|
|
+ ID_ucAreaPrice$linkvv: props.period,
|
|
|
+ ID_ucAreaPrice$LinkValue: '',
|
|
|
+ ID_ucAreaPrice$dropArea: 'code',
|
|
|
+ ID_ucAreaPrice$txtSearchCailiao: '',
|
|
|
+ ID_ucAreaPrice$UcPager1$listPage: props.page && String(props.page) || '1',
|
|
|
+ };
|
|
|
+ if (!props.eventTarget) {
|
|
|
+ body.ID_ucAreaPrice$btnAreaMaster = 'Button';
|
|
|
+ }
|
|
|
+ return body;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 获取预拌砂浆信息价格页面表单数据
|
|
|
+ * @param {Object} $ - 页面内容
|
|
|
+ * @param {Object} props - 提交属性
|
|
|
+ */
|
|
|
+function getMixedDataBody($, props) {
|
|
|
+ if (!props || !Object.keys(props).length) {
|
|
|
+ return {};
|
|
|
+ }
|
|
|
+ const body = {
|
|
|
+ __EVENTTARGET: props.eventTarget || '',
|
|
|
+ __EVENTARGUMENT: '',
|
|
|
+ __VIEWSTATE: $('#__VIEWSTATE').val(),
|
|
|
+ __VIEWSTATEGENERATOR: $('#__VIEWSTATEGENERATOR').val(),
|
|
|
+ ID_ucReadyMixedPrice$linkvv: props.period,
|
|
|
+ ID_ucReadyMixedPrice$LinkValue: '',
|
|
|
+ ID_ucReadyMixedPrice$dropArea: 'code',
|
|
|
+ ID_ucReadyMixedPrice$txtSearchCailiao: '',
|
|
|
+ ID_ucReadyMixedPrice$UcPager1$listPage: props.page && String(props.page) || '1',
|
|
|
+ };
|
|
|
+ if (!props.eventTarget) {
|
|
|
+ body.ID_ucReadyMixedPrice$btnAreaMaster = 'Button';
|
|
|
+ }
|
|
|
+ return body;
|
|
|
+}
|
|
|
+
|
|
|
+// 获取提交
|
|
|
+
|
|
|
+const TIME_OUT = 60000;
|
|
|
+
|
|
|
+// 创建axios实例
|
|
|
+const axiosInstance = axios.create({
|
|
|
+ baseURL: 'http://www.cqsgczjxx.org/Jgxx/',
|
|
|
+ timeout: TIME_OUT,
|
|
|
+ /* proxy: {
|
|
|
+ host: "127.0.0.1", port: "8888" // Fiddler抓包,需要打开Fiddler否则会报connect error
|
|
|
+ }, */
|
|
|
+ headers: {
|
|
|
+ 'Cache-Control': 'max-age=0',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
|
|
|
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
|
+ 'Accept-Encoding': 'gzip, deflate',
|
|
|
+ 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',
|
|
|
+ },
|
|
|
+ responseType: 'document'
|
|
|
+});
|
|
|
+
|
|
|
+// 响应拦截器
|
|
|
+axiosInstance.interceptors.response.use(function (response) {
|
|
|
+ return response;
|
|
|
+}, function (error) {
|
|
|
+ // 对响应错误做点什么
|
|
|
+ if (error.message.includes('timeout')) {
|
|
|
+ return Promise.reject(`目标网络超时,请稍后再试。(${TIME_OUT}ms)`);
|
|
|
+ } else {
|
|
|
+ return Promise.reject(error);
|
|
|
+ }
|
|
|
+});
|
|
|
+
|
|
|
+// 发起请求需要携带Cookie,否则一些请求会返回500错误(应该是网站的反爬措施)
|
|
|
+let curCookie = '';
|
|
|
+
|
|
|
+/**
|
|
|
+ * 加载页面,获取可用类jquery操作的数据
|
|
|
+ * @param {String} url - 拼接的url
|
|
|
+ * @param {Object} body - 表单数据
|
|
|
+ * @return {DOM-LIKE} - cheerio解析html得到的类dom数据
|
|
|
+ */
|
|
|
+async function loadPage(url, body) {
|
|
|
+ const config = {};
|
|
|
+ if (curCookie) {
|
|
|
+ config.headers = { Cookie: curCookie };
|
|
|
+ }
|
|
|
+ const rst = body ?
|
|
|
+ await axiosInstance.post(url, querystring.stringify(body), config) :
|
|
|
+ await axiosInstance.post(url, null, config);
|
|
|
+ // 更新cookie
|
|
|
+ const cookies = rst.headers['set-cookie'];
|
|
|
+ if (Object.prototype.toString.call(cookies) === '[object Array]') {
|
|
|
+ curCookie = cookies[0].split(';')[0];
|
|
|
+ }
|
|
|
+ return cheerio.load(rst.data);
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+const monthMap = {
|
|
|
+ '1': '01月',
|
|
|
+ '2': '02月',
|
|
|
+ '3': '03月',
|
|
|
+ '4': '04月',
|
|
|
+ '5': '05月',
|
|
|
+ '6': '06月',
|
|
|
+ '7': '07月',
|
|
|
+ '8': '08月',
|
|
|
+ '9': '09月',
|
|
|
+ '10': '10月',
|
|
|
+ '11': '11月',
|
|
|
+ '12': '12月',
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * 获取期数数据
|
|
|
+ * @param {String} from - 从哪一期开始 eg: 2020-01
|
|
|
+ * @param {String} to - 从哪一期结束 eg: 2020-05
|
|
|
+ * @param {Object} $index - cheerio加载的初始页面内容
|
|
|
+ * @return {Array<object> || Null} eg: {period: '2020-05', uid: 'XCCXXXXX-XX'}
|
|
|
+ */
|
|
|
+function getPeriodData(from, to, $index) {
|
|
|
+ if (from > to) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ const $period = $index('#PriceLMenu')
|
|
|
+ // 根据区间获取期数列表
|
|
|
+ const reg = /(\d+)-(\d+)/;
|
|
|
+ const fromMatch = from.match(reg);
|
|
|
+ const fromYear = +fromMatch[1];
|
|
|
+ const fromMonth = +fromMatch[2];
|
|
|
+ const toMatch = to.match(reg);
|
|
|
+ const toYear = +toMatch[1];
|
|
|
+ const toMonth = +toMatch[2];
|
|
|
+ let curYear = fromYear;
|
|
|
+ let curMonth = fromMonth;
|
|
|
+ const list = [];
|
|
|
+ while (curYear <= toYear && curMonth <= toMonth) {
|
|
|
+ const uid = getPeriodUID(curYear, curMonth, $period);
|
|
|
+ // 存在无效期数,直接返回空
|
|
|
+ if (!uid) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ list.push({
|
|
|
+ period: `${curYear}年-${monthMap[curMonth]}`,
|
|
|
+ uid
|
|
|
+ });
|
|
|
+ if (curMonth === 12) {
|
|
|
+ curYear++;
|
|
|
+ curMonth = 1;
|
|
|
+ } else {
|
|
|
+ curMonth++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return list;
|
|
|
+
|
|
|
+ function getPeriodUID(year, month, $period) {
|
|
|
+ const $year = $period.find('.MenuOneTitle').filter(function () {
|
|
|
+ return $index(this).text() === `${year}年`;
|
|
|
+ });
|
|
|
+ if (!$year.length) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ const $month = $year.parent().next().find('a').filter(function () {
|
|
|
+ return $index(this).text() === `${month}月`;
|
|
|
+ });
|
|
|
+ if (!$month.length) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ // 期数uid在onclick中,需要提取出来
|
|
|
+ const onclickText = $month.attr('onclick').toString();
|
|
|
+ const reg = /Onlink\('([^']+)'/;
|
|
|
+ const matched = onclickText.match(reg);
|
|
|
+ if (!matched || !matched[1]) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ return matched[1];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// 表格类型
|
|
|
+const TableType = {
|
|
|
+ BUILDING: 1, // 主要材料中的建安工程材料和绿色
|
|
|
+ GARDEN: 2, // 主要材料中的园林绿化
|
|
|
+ ENERGY: 3, // 主要材料中的节能建筑工程材料
|
|
|
+ AREA: 4, // 地区相关(各区县材料)
|
|
|
+ MIXED: 5, // 地区相关(预拌砂浆)
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取表格数据
|
|
|
+ * @param {Object} $page - 页面内容
|
|
|
+ * @param {Number} type - 表格类型
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+function crawlTableData($page, type) {
|
|
|
+ switch (type) {
|
|
|
+ case TableType.BUILDING:
|
|
|
+ case TableType.ENERGY:
|
|
|
+ return crawlNormalTable($page);
|
|
|
+ case TableType.GARDEN:
|
|
|
+ return crawlGardenTable($page);
|
|
|
+ case TableType.AREA:
|
|
|
+ return crawlAreaTable($page, '#ID_ucAreaPrice_gridView');
|
|
|
+ case TableType.MIXED:
|
|
|
+ return crawlAreaTable($page, '#ID_ucReadyMixedPrice_gridView');
|
|
|
+ }
|
|
|
+ return [];
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取表格数据,表格列为:
|
|
|
+ * 序号 | 材料名称 | 规格型号 | 单位 | 含税价(元) | 不含税价(元) | 备注
|
|
|
+ * @param {Object} $page - 页面内容
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+function crawlNormalTable($page) {
|
|
|
+ const colMap = {
|
|
|
+ 0: 'name',
|
|
|
+ 1: 'specs',
|
|
|
+ 2: 'unit',
|
|
|
+ 3: 'taxPrice',
|
|
|
+ 4: 'noTaxPrice',
|
|
|
+ 5: 'remark'
|
|
|
+ };
|
|
|
+ const data = [];
|
|
|
+ let cur;
|
|
|
+ const $tdList = $page('#ID_ucPrice_gridView').find('tr td span').filter(index => index % 7 !== 0); // 排除表头和序号列
|
|
|
+ $tdList.each(function (index) {
|
|
|
+ const col = index % 6;
|
|
|
+ if (col === 0) {
|
|
|
+ cur = {}
|
|
|
+ }
|
|
|
+ cur[colMap[col]] = $page(this).text();
|
|
|
+ if (col === 5) {
|
|
|
+ data.push(cur);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ debugConsole(data);
|
|
|
+ return data;
|
|
|
+}
|
|
|
+/**
|
|
|
+ * 爬取表格数据,表格列为:
|
|
|
+ * 序号 | 科属 | 品名 | 高度(CM) | 干径(CM) | 冠径(CM) | 分枝高(CM) | 单位 | 含税价(元) | 不含税价(元) | 备注
|
|
|
+ * @param {Object} $page - 页面内容
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+function crawlGardenTable($page) {
|
|
|
+ const colMap = {
|
|
|
+ 0: 'genera',
|
|
|
+ 1: 'name',
|
|
|
+ 2: 'height',
|
|
|
+ 3: 'branchDiameter',
|
|
|
+ 4: 'crownDiameter',
|
|
|
+ 5: 'branchHeight',
|
|
|
+ 6: 'unit',
|
|
|
+ 7: 'taxPrice',
|
|
|
+ 8: 'noTaxPrice',
|
|
|
+ 9: 'remark',
|
|
|
+ };
|
|
|
+ const data = [];
|
|
|
+ let cur;
|
|
|
+ const $tdList = $page('#ID_ucPrice_gridView').find('tr td span').filter(index => index % 11 !== 0); // 排除表头和序号列
|
|
|
+ $tdList.each(function (index) {
|
|
|
+ const col = index % 10;
|
|
|
+ if (col === 0) {
|
|
|
+ cur = {}
|
|
|
+ }
|
|
|
+ cur[colMap[col]] = $page(this).text();
|
|
|
+ if (col === 9) {
|
|
|
+ data.push(cur);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ debugConsole(data);
|
|
|
+ return data;
|
|
|
+}
|
|
|
+/**
|
|
|
+ * 爬取表格数据,表格列为:
|
|
|
+ * 序号 | 所属区县 | 材料名称 | 规格及型号 | 计量单位 | 含税价(元) | 不含税价(元)
|
|
|
+ * @param {Object} $page - 页面内容
|
|
|
+ * @param {String} viewSelector - 表格选择器(ID)
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+function crawlAreaTable($page, viewSelector) {
|
|
|
+ const colMap = {
|
|
|
+ 0: 'area',
|
|
|
+ 1: 'name',
|
|
|
+ 2: 'specs',
|
|
|
+ 3: 'unit',
|
|
|
+ 4: 'taxPrice',
|
|
|
+ 5: 'noTaxPrice',
|
|
|
+ };
|
|
|
+ const data = [];
|
|
|
+ let cur;
|
|
|
+ const $tdList = $page(viewSelector).find('tr td span').filter(index => index % 7 !== 0); // 排除表头和序号列
|
|
|
+ $tdList.each(function (index) {
|
|
|
+ const col = index % 6;
|
|
|
+ if (col === 0) {
|
|
|
+ cur = {}
|
|
|
+ }
|
|
|
+ cur[colMap[col]] = $page(this).text();
|
|
|
+ if (col === 5) {
|
|
|
+ data.push(cur);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ debugConsole(data);
|
|
|
+ return data;
|
|
|
+}
|
|
|
+
|
|
|
+// 事件触发类型
|
|
|
+const EventTarget = {
|
|
|
+ GENERAL_NEXT: 'ID_ucPrice$UcPager1$btnNext',
|
|
|
+ AREA_NEXT: 'ID_ucAreaPrice$UcPager1$btnNext',
|
|
|
+ MIXED_NEXT: 'ID_ucReadyMixedPrice_UcPager1_btnNext',
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取一页一页的表格数据
|
|
|
+ * @param {Object} $index - 索引页面内容
|
|
|
+ * @param {Object} props - 提交的表单内容
|
|
|
+ * @param {String} pageType - 页面类型
|
|
|
+ * @param {Number} tableType - 表格类型
|
|
|
+ */
|
|
|
+async function crawlPagesData($index, props, pageType, tableType) {
|
|
|
+ let body;
|
|
|
+ let pageStateSelector;
|
|
|
+ if (pageType === PageType.GENERAL) {
|
|
|
+ body = getGeneralDataBody($index, props);
|
|
|
+ pageStateSelector = '#ID_ucPrice_UcPager1_lbPage';
|
|
|
+ } else if (pageType === PageType.AREA) {
|
|
|
+ body = getAreaDataBody($index, props);
|
|
|
+ pageStateSelector = '#ID_ucAreaPrice_UcPager1_lbPage';
|
|
|
+ } else {
|
|
|
+ body = getMixedDataBody($index, props);
|
|
|
+ pageStateSelector = '#ID_ucReadyMixedPrice_UcPager1_lbPage';
|
|
|
+ }
|
|
|
+ const $firstPage = await loadPage(pageType, body);
|
|
|
+ const rst = [];
|
|
|
+ // 获取第一页数据
|
|
|
+ rst.push(...crawlTableData($firstPage, tableType));
|
|
|
+ if (!rst.length) { // 第一页都没数据,后续不需要操作了
|
|
|
+ return rst;
|
|
|
+ }
|
|
|
+ // 获取除第一页的数据
|
|
|
+ // 获取页码
|
|
|
+ const pageState = $firstPage(pageStateSelector).text(); // eg: 1/10
|
|
|
+ const totalPage = +pageState.split('/')[1];
|
|
|
+ const asyncCount = 6; // 最高批量次数
|
|
|
+ let curCount = 0;
|
|
|
+ let task = [];
|
|
|
+ for (let page = 1; page < totalPage; page++) {
|
|
|
+ task.push(crawlPageData(page));
|
|
|
+ curCount++;
|
|
|
+ if (curCount === asyncCount) {
|
|
|
+ const allData = await Promise.all(task);
|
|
|
+ allData.forEach(data => rst.push(...data));
|
|
|
+ curCount = 0;
|
|
|
+ task = [];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (task.length) {
|
|
|
+ const allData = await Promise.all(task);
|
|
|
+ allData.forEach(data => rst.push(...data));
|
|
|
+ }
|
|
|
+ return rst;
|
|
|
+
|
|
|
+ // 爬取页码数据
|
|
|
+ async function crawlPageData(page) {
|
|
|
+ const pageProps = { ...props, page };
|
|
|
+ let body;
|
|
|
+ if (pageType === PageType.GENERAL) {
|
|
|
+ pageProps.eventTarget = EventTarget.GENERAL_NEXT;
|
|
|
+ body = getGeneralDataBody($firstPage, pageProps);
|
|
|
+ } else if (pageType === PageType.AREA) {
|
|
|
+ pageProps.eventTarget = EventTarget.AREA_NEXT;
|
|
|
+ body = getAreaDataBody($firstPage, pageProps);
|
|
|
+ } else {
|
|
|
+ pageProps.eventTarget = EventTarget.MIXED_NEXT;
|
|
|
+ body = getMixedDataBody($firstPage, pageProps);
|
|
|
+ }
|
|
|
+ const $page = await loadPage(pageType, body);
|
|
|
+ return crawlTableData($page, tableType);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取建安工程材料和绿色、园林绿化工程材料、节能建筑工程材料
|
|
|
+ * @param {String} period - 期数uid
|
|
|
+ * @param {String} classID - 工程分类id
|
|
|
+ * @param {Object} $index - 初始页面内容
|
|
|
+ * @param {Number} type - 表格类型
|
|
|
+ * @return {Array<object>} eg: [{ materialClass: '一、黑色及有色金属', items: [...] }]
|
|
|
+ */
|
|
|
+async function crawlGeneralSubData(period, classID, $index, type) {
|
|
|
+ const body = getGeneralDataBody($index, { period, classID });
|
|
|
+ const $engineeringClassPage = await loadPage(PageType.GENERAL, body);
|
|
|
+ const rst = [];
|
|
|
+ if (type === TableType.BUILDING) {
|
|
|
+ const classList = crawlMaterialClassList($index('#ID_ucPrice_CategoryLabel'));
|
|
|
+ if (!classList.length) {
|
|
|
+ throw '无法爬取到材料分类。';
|
|
|
+ }
|
|
|
+ const reg = /[一二三四五六七八九十]+、/;
|
|
|
+ for (const materialClass of classList) {
|
|
|
+ const obj = { materialClass: materialClass.replace(reg, ''), items: [] }; // 材料分类去除序号
|
|
|
+ obj.items = await crawlPagesData($engineeringClassPage, { period, classID, materialClass }, PageType.GENERAL, type);
|
|
|
+ rst.push(obj);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ const items = await crawlPagesData($engineeringClassPage, { period, classID, materialClass: '' }, PageType.GENERAL, type);
|
|
|
+ rst.push(...items);
|
|
|
+ }
|
|
|
+ return rst;
|
|
|
+
|
|
|
+ // 爬取材料分类表
|
|
|
+ function crawlMaterialClassList($class) {
|
|
|
+ const list = [];
|
|
|
+ $class.find('a').each(function () {
|
|
|
+ const text = $engineeringClassPage(this).text();
|
|
|
+ list.push(text);
|
|
|
+ });
|
|
|
+ return list;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取主要材料信息价格(这部分作为通用库)
|
|
|
+ * @param {String} period - 期数uid
|
|
|
+ * @param {Object} $index - 初始页面内容
|
|
|
+ * @return {Object}
|
|
|
+ */
|
|
|
+async function crawlGeneralData(period, $index) {
|
|
|
+ const { building, garden, energy } = crawlClass($index('#ID_ucPrice_tabNewBar'));
|
|
|
+ const rst = {};
|
|
|
+ if (building) {
|
|
|
+ rst.building = await crawlGeneralSubData(period, building, $index, TableType.BUILDING);
|
|
|
+ }
|
|
|
+ if (garden) {
|
|
|
+ // 园林绿化工程材料下的数据所属分类为数据的"科属"列
|
|
|
+ rst.garden = await crawlGeneralSubData(period, garden, $index, TableType.GARDEN);
|
|
|
+ }
|
|
|
+ if (energy) {
|
|
|
+ // 绿色、节能建筑工程材料下的所有数据,所属分类均为“绿色、节能建筑工程材料”。
|
|
|
+ rst.energy = await crawlGeneralSubData(period, energy, $index, TableType.ENERGY);
|
|
|
+ }
|
|
|
+ return rst;
|
|
|
+
|
|
|
+ // 爬取工程分类
|
|
|
+ function crawlClass($class) {
|
|
|
+ // 工程分类
|
|
|
+ let building; // 建安工程材料
|
|
|
+ let garden; // 园林绿化工程材料
|
|
|
+ let energy; // 绿色、节能建筑工程材料
|
|
|
+ const reg = /OnClassson\('([^']+)'/;
|
|
|
+ $class.find('a').each(function () {
|
|
|
+ const text = $index(this).text();
|
|
|
+ const onclickText = $index(this).attr('onclick').toString();
|
|
|
+ const matched = onclickText.match(reg);
|
|
|
+ if (!matched || !matched[1]) {
|
|
|
+ throw '无法爬取到工程分类。';
|
|
|
+ }
|
|
|
+ if (text === '建安工程材料') {
|
|
|
+ building = matched[1];
|
|
|
+ } else if (text === '园林绿化工程材料') {
|
|
|
+ garden = matched[1];
|
|
|
+ } else if (text === '绿色、节能建筑工程材料') {
|
|
|
+ energy = matched[1];
|
|
|
+ }
|
|
|
+ });
|
|
|
+ return { building, garden, energy };
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取各区县地方材料工地价格
|
|
|
+ * @param {String} period - 期数uid
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+async function crawlAreaData(period) {
|
|
|
+ // 获取各区材料初始页
|
|
|
+ const $index = await loadPage(PageType.AREA);
|
|
|
+ // 获取地区材料
|
|
|
+ return await crawlPagesData($index, { period }, PageType.AREA, TableType.AREA);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取预拌砂浆信息价格
|
|
|
+ * @param {String} period - 期数uid
|
|
|
+ * @return {Array<object>}
|
|
|
+ */
|
|
|
+async function crawlMixedData(period) {
|
|
|
+ // 获取各区材料初始页
|
|
|
+ const $index = await loadPage(PageType.MIXED);
|
|
|
+ // 获取地区材料
|
|
|
+ return await crawlPagesData($index, { period }, PageType.MIXED, TableType.MIXED);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 转换价格数据(一条源数据可能需要分割成多条数据)
|
|
|
+ * @param {String} libID - 库ID
|
|
|
+ * @param {String} classID - 所属分类ID
|
|
|
+ * @param {String} period - 期数 eg:2020年01月
|
|
|
+ * @param {String} areaID - 地区ID
|
|
|
+ * @param {String} compilationID - 费用定额ID
|
|
|
+ * @param {Array<object>} items - 爬取的信息价源数据
|
|
|
+ * @param {Number} tableType - 表格类型
|
|
|
+ * @return {Array<obejct>}
|
|
|
+ */
|
|
|
+function transformPriceItems(libID, classID, period, areaID, compilationID, items, tableType) {
|
|
|
+ const rst = [];
|
|
|
+ if (tableType === TableType.GARDEN) {
|
|
|
+ // 有的数据 高度(CM) | 干径(CM) | 冠径(CM) | 分枝高(CM) | 不含税价(元) = ‘’ | 14-17 | 大于400 | 200-300 | 430-780
|
|
|
+ // 则此数据需要分为:
|
|
|
+ // 1. { name: 名称-最低价, specs: 干径14-17CM 冠径大于400CM 分枝高200-300CM, noTaxPrice: 430 }
|
|
|
+ // 2. { name: 名称-最高价, specs: 干径14-17CM 冠径大于400CM 分枝高200-300CM, noTaxPrice: 780 }
|
|
|
+ const unit = 'CM';
|
|
|
+ const duplicateReg = /-/;
|
|
|
+ items.forEach(item => {
|
|
|
+ // 拼接规格型号
|
|
|
+ const specsList = [];
|
|
|
+ if (item.height) {
|
|
|
+ specsList.push(`高度${item.height}${unit}`);
|
|
|
+ }
|
|
|
+ if (item.branchDiameter) {
|
|
|
+ specsList.push(`干径${item.branchDiameter}${unit}`);
|
|
|
+ }
|
|
|
+ if (item.crownDiameter) {
|
|
|
+ specsList.push(`冠径${item.crownDiameter}${unit}`);
|
|
|
+ }
|
|
|
+ if (item.branchHeight) {
|
|
|
+ specsList.push(`分枝高${item.branchHeight}${unit}`);
|
|
|
+ }
|
|
|
+ const specs = specsList.join(' ');
|
|
|
+ // 分成最高低价最高价数据
|
|
|
+ const isDuplicate = duplicateReg.test(item.taxPrice) || duplicateReg.test(item.noTaxPrice);
|
|
|
+ if (isDuplicate) {
|
|
|
+ const taxPriceList = item.taxPrice.split('-');
|
|
|
+ const noTaxPriceList = item.noTaxPrice.split('-');
|
|
|
+ const minItem = {
|
|
|
+ ...item,
|
|
|
+ name: `${item.name}-最低价`,
|
|
|
+ specs,
|
|
|
+ taxPrice: taxPriceList[0],
|
|
|
+ noTaxPrice: noTaxPriceList[0]
|
|
|
+ };
|
|
|
+ const maxItem = {
|
|
|
+ ...item,
|
|
|
+ name: `${item.name}-最高价`,
|
|
|
+ specs,
|
|
|
+ taxPrice: taxPriceList[1] || '',
|
|
|
+ noTaxPrice: noTaxPriceList[1] || ''
|
|
|
+ };
|
|
|
+ rst.push(transfromPriceItem(libID, classID, period, areaID, compilationID, minItem));
|
|
|
+ rst.push(transfromPriceItem(libID, classID, period, areaID, compilationID, maxItem));
|
|
|
+ } else {
|
|
|
+ rst.push(transfromPriceItem(libID, classID, period, areaID, compilationID, item));
|
|
|
+ }
|
|
|
+ })
|
|
|
+ } else {
|
|
|
+ const duplicateReg = /\//;
|
|
|
+ // 有的数据:规格型号 | 含税价(元) | 不含税价(元) = φ6(6.5)/φ8 HPB300 | 4030.00/3880.00 | 3566.37/3433.63,则这条数据需要分成两条数据
|
|
|
+ items.forEach(item => {
|
|
|
+ item.taxPrice = item.taxPrice === '-' ? '' : item.taxPrice;
|
|
|
+ item.noTaxPrice = item.noTaxPrice === '-' ? '' : item.noTaxPrice;
|
|
|
+ const isDuplicate = duplicateReg.test(item.taxPrice) || duplicateReg.test(item.noTaxPrice); // 以价格被分割,作为数据需要分割的判断
|
|
|
+ if (isDuplicate) {
|
|
|
+ // 提取规格型号分割部分和公共部分:Q390/Q420 δ=20-30 => Q390 δ=20-30; Q420 δ=20-30
|
|
|
+ // 获取公共规格型号部分
|
|
|
+ const commonReg = /\s+([^/]*)$/;
|
|
|
+ const commonMatched = item.specs.match(commonReg);
|
|
|
+ const commonSpecs = commonMatched && commonMatched[1] ? ' ' + commonMatched[1] : '';
|
|
|
+ // 获取分割规格型号
|
|
|
+ const specsList = item.specs
|
|
|
+ .replace(commonReg, '')
|
|
|
+ .split('/');
|
|
|
+ const taxPriceList = item.taxPrice.split('/');
|
|
|
+ const noTaxPriceList = item.noTaxPrice.split('/');
|
|
|
+ specsList.forEach((specs, index) => {
|
|
|
+ const newItem = {
|
|
|
+ ...item,
|
|
|
+ specs: `${specs}${commonSpecs}`,
|
|
|
+ taxPrice: taxPriceList[index] || taxPriceList[0],
|
|
|
+ noTaxPrice: noTaxPriceList[index] || noTaxPriceList[0]
|
|
|
+ };
|
|
|
+ if (areaID) {
|
|
|
+ newItem.areaID = areaID;
|
|
|
+ }
|
|
|
+ rst.push(transfromPriceItem(libID, classID, period, areaID, compilationID, newItem));
|
|
|
+ });
|
|
|
+ } else {
|
|
|
+ rst.push(transfromPriceItem(libID, classID, period, areaID, compilationID, item));
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ return rst;
|
|
|
+}
|
|
|
+
|
|
|
+// 转换单条的价格数据
|
|
|
+function transfromPriceItem(libID, classID, period, areaID, compilationID, item) {
|
|
|
+ // 源数据中的规格型号存在多个无意义的空格,合并为一个
|
|
|
+ const reg = /\s{2,}/g;
|
|
|
+ item.specs = item.specs ? item.specs.replace(reg, ' ') : '';
|
|
|
+ return {
|
|
|
+ ID: uuidV1(),
|
|
|
+ libID,
|
|
|
+ classID,
|
|
|
+ code: '',
|
|
|
+ name: item.name,
|
|
|
+ specs: item.specs,
|
|
|
+ unit: item.unit,
|
|
|
+ taxPrice: item.taxPrice,
|
|
|
+ noTaxPrice: item.noTaxPrice,
|
|
|
+ remark: item.remark || '',
|
|
|
+ // 以下冗余数据为方便前台信息价功能处理
|
|
|
+ period,
|
|
|
+ areaID,
|
|
|
+ compilationID,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 转换主要材料
|
|
|
+ * @param {String} period - 日期: 2020年01月
|
|
|
+ * @param {String} compilationID - 费用定额ID
|
|
|
+ * @param {Object} generalData - 主要材料{ building, garden, energy }
|
|
|
+ * @return {Object} { libData, classData, priceData, compilationAreas }
|
|
|
+ */
|
|
|
+async function transfromGeneralData(period, compilationID, generalData) {
|
|
|
+ const area = '通用';
|
|
|
+ // 爬取数据的时候,地区数据先匹配名称,如果费用定额已有此地区,不新增
|
|
|
+ const matchedArea = await priceInfoAreaModel.findOne({ compilationID, name: area }).lean();
|
|
|
+ const areaID = matchedArea && matchedArea.ID || uuidV1();
|
|
|
+ const compilationAreas = [];
|
|
|
+ const libData = {
|
|
|
+ ID: uuidV1(),
|
|
|
+ name: `信息价(${period})`,
|
|
|
+ period,
|
|
|
+ areas: [],
|
|
|
+ compilationID,
|
|
|
+ createDate: Date.now(),
|
|
|
+ };
|
|
|
+ const classData = [];
|
|
|
+ let curClassIndex = 0;
|
|
|
+ const priceData = [];
|
|
|
+ const { building, garden, energy } = generalData;
|
|
|
+ handleClassAndItems(building, TableType.BUILDING);
|
|
|
+ // 园林分类数据为:苗木-科属(genera)
|
|
|
+ const gardenRoot = { materialClass: '苗木', treeData: { ID: uuidV1(), ParentID: '-1' } };
|
|
|
+ const gardenData = [gardenRoot];
|
|
|
+ garden.forEach(item => {
|
|
|
+ const pre = gardenData[gardenData.length - 1];
|
|
|
+ if (item.genera !== pre.materialClass) {
|
|
|
+ gardenData.push({ materialClass: item.genera, treeData: { ParentID: gardenRoot.treeData.ID }, items: [item] });
|
|
|
+ } else {
|
|
|
+ pre.items.push(item);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ handleClassAndItems(gardenData, TableType.GARDEN)
|
|
|
+ // 绿色节能分类数据:绿色、节能建筑工程材料
|
|
|
+ const energyData = [{ materialClass: '绿色、节能建筑工程材料', items: energy }];
|
|
|
+ handleClassAndItems(energyData, TableType.ENERGY);
|
|
|
+ // 有数据才将地区push入areas中(费用定额共用)
|
|
|
+ if ((classData.length || priceData.length) && !matchedArea) {
|
|
|
+ compilationAreas.push({ compilationID, ID: areaID, name: area })
|
|
|
+ }
|
|
|
+ return { libData, classData, priceData, compilationAreas };
|
|
|
+
|
|
|
+ function handleClassAndItems(sourceData, tableType) {
|
|
|
+ if (!sourceData) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ sourceData.forEach(({ materialClass, treeData, items }) => {
|
|
|
+ const classItem = {
|
|
|
+ ID: treeData && treeData.ID || uuidV1(),
|
|
|
+ ParentID: treeData && treeData.ParentID || '-1',
|
|
|
+ NextSiblingID: treeData && treeData.NextSiblingID || '-1',
|
|
|
+ name: materialClass,
|
|
|
+ libID: libData.ID,
|
|
|
+ areaID,
|
|
|
+ };
|
|
|
+ // 设置上一个节点数据的NextID
|
|
|
+ let count = 1;
|
|
|
+ let pre = classData[curClassIndex - 1];
|
|
|
+ while (pre && pre.ParentID !== classItem.ParentID) {
|
|
|
+ count++;
|
|
|
+ pre = classData[curClassIndex - count];
|
|
|
+ }
|
|
|
+ if (pre && pre.ParentID === classItem.ParentID) {
|
|
|
+ pre.NextSiblingID = classItem.ID;
|
|
|
+ }
|
|
|
+ curClassIndex++;
|
|
|
+ classData.push(classItem);
|
|
|
+ // 转换价格数据
|
|
|
+ if (items && items.length) {
|
|
|
+ const newItems = transformPriceItems(libData.ID, classItem.ID, period, areaID, compilationID, items, tableType);
|
|
|
+ newItems.forEach(item => priceData.push(item));
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 转换跟地区相关的数据
|
|
|
+ * 地区作为期数库的子项
|
|
|
+ * @param {String} period - 日期: 2020年01月
|
|
|
+ * @param {String} compilationID - 费用定额ID
|
|
|
+ * @param {String} className - 分类名称
|
|
|
+ * @param {Object} libData - 当前期数库数据
|
|
|
+ * @param {Array<object>} areaData - 各区县地方材料工地价格
|
|
|
+ * @param {Array<object>} mixedData - 预拌砂浆信息价格
|
|
|
+ * @return {Object}
|
|
|
+ */
|
|
|
+async function transformAreaData(period, compilationID, libData, areaData, mixedData) {
|
|
|
+ // 根据地区进行分类
|
|
|
+ const data = [];
|
|
|
+ const hashMap = {}; // 保证地区顺序跟网页爬取数据的顺序一致。(object for in无法保证顺序)
|
|
|
+ function hash(area) {
|
|
|
+ if (!isDef(hashMap[area])) {
|
|
|
+ hashMap[area] = Object.keys(hashMap).length
|
|
|
+ }
|
|
|
+ return hashMap[area];
|
|
|
+ }
|
|
|
+ const areaClass = '地方材料信息价';
|
|
|
+ const mixedClass = '预拌商品砂浆';
|
|
|
+ function buildData(sourceData) {
|
|
|
+ sourceData.forEach(item => {
|
|
|
+ const idx = hash(item.area);
|
|
|
+ if (!data[idx]) {
|
|
|
+ data[idx] = { area: item.area, subData: [] };
|
|
|
+ }
|
|
|
+ if (sourceData === areaData) {
|
|
|
+ // 存在地区数据,需要生成分类“地方材料信息价”
|
|
|
+ if (!data[idx].subData[0]) {
|
|
|
+ data[idx].subData[0] = { className: areaClass, items: [] };
|
|
|
+ }
|
|
|
+ data[idx].subData[0].items.push(item);
|
|
|
+ } else if (sourceData === mixedData) {
|
|
|
+ // 存在地区数据,需要生成分类“地方材料信息价”
|
|
|
+ if (!data[idx].subData[1]) {
|
|
|
+ data[idx].subData[1] = { className: mixedClass, items: [] };
|
|
|
+ }
|
|
|
+ data[idx].subData[1].items.push(item);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ buildData(areaData);
|
|
|
+ buildData(mixedData);
|
|
|
+ const compilationAreas = [];
|
|
|
+ const classData = [];
|
|
|
+ const priceData = [];
|
|
|
+ for (const { area, subData } of data) {
|
|
|
+ const matchedArea = await priceInfoAreaModel.findOne({ compilationID, name: area }).lean();
|
|
|
+ const areaID = matchedArea && matchedArea.ID || uuidV1();
|
|
|
+ if (!matchedArea) {
|
|
|
+ compilationAreas.push({ compilationID, ID: areaID, name: area });
|
|
|
+ }
|
|
|
+ let preClass;
|
|
|
+ subData.forEach(subItem => {
|
|
|
+ if (!subItem) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ const { className, items } = subItem;
|
|
|
+ const classItem = {
|
|
|
+ ID: uuidV1(),
|
|
|
+ ParentID: '-1',
|
|
|
+ NextSiblingID: '-1',
|
|
|
+ name: className,
|
|
|
+ libID: libData.ID,
|
|
|
+ areaID,
|
|
|
+ };
|
|
|
+ classData.push(classItem);
|
|
|
+ if (preClass) {
|
|
|
+ preClass.NextSiblingID = classItem.ID;
|
|
|
+ }
|
|
|
+ preClass = classItem;
|
|
|
+ const newItems = transformPriceItems(libData.ID, classItem.ID, period, areaID, compilationID, items, TableType.AREA);
|
|
|
+ newItems.forEach(item => priceData.push(item));
|
|
|
+ });
|
|
|
+ }
|
|
|
+ return { classData, priceData, compilationAreas };
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 数据入库
|
|
|
+ * 生成一个通用库及各地区
|
|
|
+ * @param {String} period 期数 eg: '2020年05月'
|
|
|
+ * @param {Object} generalData - 主要材料{ building, garden, energy }
|
|
|
+ * @param {Array<object>} areaData - 各地区材料
|
|
|
+ * @param {Array<object>} mixedData - 各地区预拌砂浆
|
|
|
+ */
|
|
|
+async function save(period, generalData, areaData, mixedData) {
|
|
|
+ const overWriteUrl = '/web/over_write/js/chongqing_2018.js';
|
|
|
+ const compilation = await compilationModel.findOne({ overWriteUrl }, '_id').lean();
|
|
|
+ if (!compilation) {
|
|
|
+ throw '没有找到正确配置overWriteUrl的费用定额。';
|
|
|
+ }
|
|
|
+ const compilationID = compilation._id;
|
|
|
+ // 转换数据
|
|
|
+ const generalSaveData = await transfromGeneralData(period, compilationID, generalData);
|
|
|
+ const libData = generalSaveData.libData;
|
|
|
+ const areaSaveData = await transformAreaData(period, compilationID, libData, areaData, mixedData);
|
|
|
+ // 入库
|
|
|
+ const classData = [...generalSaveData.classData, ...areaSaveData.classData];
|
|
|
+ const priceData = [...generalSaveData.priceData, ...areaSaveData.priceData];
|
|
|
+ const compilationAreas = [...generalSaveData.compilationAreas, ...areaSaveData.compilationAreas]
|
|
|
+ // 删除已有的相同期数数据
|
|
|
+ const originalLibs = await priceInfoLibModel.find({ period }, '-_id ID').lean();
|
|
|
+ const originalLibIDList = originalLibs.reduce((acc, cur) => {
|
|
|
+ acc.push(cur.ID);
|
|
|
+ return acc;
|
|
|
+ }, []);
|
|
|
+ if (originalLibIDList.length) {
|
|
|
+ await priceInfoItemModel.deleteMany({ period });
|
|
|
+ await priceInfoClassModel.deleteMany({ libID: { $in: originalLibIDList } });
|
|
|
+ await priceInfoLibModel.deleteMany({ period });
|
|
|
+ }
|
|
|
+ // 插入数据
|
|
|
+ if (priceData.length) {
|
|
|
+ await priceInfoItemModel.insertMany(priceData);
|
|
|
+ }
|
|
|
+ if (classData.length) {
|
|
|
+ await priceInfoClassModel.insertMany(classData);
|
|
|
+ }
|
|
|
+ if (libData) {
|
|
|
+ await priceInfoLibModel.insertMany([libData]);
|
|
|
+ }
|
|
|
+ if (compilationAreas) {
|
|
|
+ await priceInfoAreaModel.insertMany(compilationAreas);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 爬取数据
|
|
|
+ * @param {String} from - 从哪一期开始 eg: 2020-01
|
|
|
+ * @param {String} to - 从哪一期结束 eg: 2020-05
|
|
|
+ * @return {Object}
|
|
|
+ */
|
|
|
+async function crawlData(from, to) {
|
|
|
+ let curPeriod;
|
|
|
+ try {
|
|
|
+ const $index = await loadPage(PageType.GENERAL);
|
|
|
+ const periodData = getPeriodData(from, to, $index);
|
|
|
+ if (!periodData) {
|
|
|
+ throw '无效的期数区间。';
|
|
|
+ }
|
|
|
+ // 一期一期爬取数据
|
|
|
+ debugConsole('allTime', 'time');
|
|
|
+ for (const periodItem of periodData) {
|
|
|
+ debugConsole('peroidTime', 'time');
|
|
|
+ // 爬取主要材料信息价格
|
|
|
+ const generalData = await crawlGeneralData(periodItem.uid, $index); // 初始页面就是主要材料信息价的页面
|
|
|
+ // 爬取各区县地方材料工地价格
|
|
|
+ const areaData = await crawlAreaData(periodItem.uid);
|
|
|
+ // 爬取预拌砂浆信息价格
|
|
|
+ const mixedData = await crawlMixedData(periodItem.uid);
|
|
|
+ // 转换数据并入库
|
|
|
+ await save(periodItem.period, generalData, areaData, mixedData);
|
|
|
+ curPeriod = periodItem.period;
|
|
|
+ debugConsole('peroidTime', 'timeEnd');
|
|
|
+ }
|
|
|
+ debugConsole('allTime', 'timeEnd');
|
|
|
+ } catch (err) {
|
|
|
+ console.log(err);
|
|
|
+ // 错误时提示已经成功爬取的期数
|
|
|
+ let errTip = '';
|
|
|
+ if (curPeriod) {
|
|
|
+ errTip += `\n成功爬取期数为:${from}到${curPeriod}`;
|
|
|
+ }
|
|
|
+ const errStr = String(err) + errTip;
|
|
|
+ console.log(`err`);
|
|
|
+ console.log(errStr);
|
|
|
+ throw errStr;
|
|
|
+ }
|
|
|
+}
|