/* * @Descripttion: 广东材料信息价格获取(通过造价通接口) * @Author: vian * @Date: 2020-09-29 11:22:59 */ module.exports = { crawlData, }; const uuidV1 = require('uuid/v1'); const mongoose = require('mongoose'); const axios = require('axios'); const querystring = require('querystring'); const priceInfoLibModel = mongoose.model('std_price_info_lib'); const priceInfoClassModel = mongoose.model('std_price_info_class'); const priceInfoAreaModel = mongoose.model('std_price_info_areas'); const priceInfoItemModel = mongoose.model('std_price_info_items'); const priceInfoSourceModel = mongoose.model('std_price_info_source'); const gljLibModel = mongoose.model('std_glj_lib_map'); const gljClassModel = mongoose.model('std_glj_lib_gljClass'); // 造价通网页上整理的地区https://gd.zjtcn.com/gov/c_cs_d_t_p1.html const areas = [ { city: '广州市', county: '广州市' }, { city: '广州市', county: '花都区' }, { city: '广州市', county: '增城市' }, { city: '广州市', county: '从化市' }, { city: '韶关市', county: '韶关市' }, { city: '韶关市', county: '乐昌市' }, { city: '深圳市', county: '深圳市' }, { city: '珠海市', county: '珠海市' }, { city: '汕头市', county: '汕头市' }, { city: '汕头市', county: '濠江区' }, { city: '汕头市', county: '潮阳区' }, { city: '汕头市', county: '潮南区' }, { city: '汕头市', county: '澄海区' }, { city: '汕头市', county: '南澳县' }, { city: '佛山市', county: '佛山市' }, { city: '佛山市', county: '南海区' }, { city: '佛山市', county: '顺德区' }, { city: '江门市', county: '江门市' }, { city: '江门市', county: '新会区' }, { city: '江门市', county: '台山市' }, { city: '江门市', county: '开平市' }, { city: '江门市', county: '鹤山市' }, { city: '江门市', county: '恩平市' }, { city: '湛江市', county: '湛江市' }, { city: '湛江市', county: '遂溪县' }, { city: '湛江市', county: '徐闻县' }, { city: '湛江市', county: '廉江市' }, { city: '湛江市', county: '雷州市' }, { city: '湛江市', county: '吴川市' }, { city: '茂名市', county: '茂名市' }, { city: '茂名市', county: '电白市' }, { city: '茂名市', county: '高州市' }, { city: '茂名市', county: '化州市' }, { city: '茂名市', county: '信宜市' }, { city: '肇庆市', county: '肇庆市' }, { city: '肇庆市', county: '鼎湖区' }, { city: '肇庆市', county: '广宁县' }, { city: '肇庆市', county: '怀集县' }, { city: '肇庆市', county: '封开县' }, { city: '肇庆市', county: '德庆县' }, { city: '肇庆市', county: '高要市' }, { city: '肇庆市', county: '四会市' }, { city: '惠州市', county: '惠州市' }, { city: '惠州市', county: '惠阳区' }, { city: '惠州市', county: '大亚湾开发区' }, { city: '惠州市', county: '博罗县' }, { city: '惠州市', county: '惠东县' }, { city: '惠州市', county: '龙门县' }, { city: '梅州市', county: '梅州市' }, { city: '梅州市', county: '梅县' }, { city: '梅州市', county: '大埔县' }, { city: '梅州市', county: '丰顺县' }, { city: '梅州市', county: '平远县' }, { city: '汕尾市', county: '汕尾市' }, { city: '汕尾市', county: '海丰县' }, { city: '汕尾市', county: '陆河县' }, { city: '河源市', county: '河源市' }, { city: '河源市', county: '紫金县' }, { city: '河源市', county: '龙川县' }, { city: '河源市', county: '连平县' }, { city: '河源市', county: '和平县' }, { city: '河源市', county: '东源县' }, { city: '阳江市', county: '阳江市' }, { city: '阳江市', county: '海陵岛区' }, { city: '阳江市', county: '阳西县' }, { city: '阳江市', county: '阳春市' }, { city: '清远市', county: '清远市' }, { city: '清远市', county: '佛冈县' }, { city: '清远市', county: '阳山县' }, { city: '清远市', county: '连山县' }, { city: '清远市', county: '连南县' }, { city: '清远市', county: '英德市' }, { city: '清远市', county: '连州市' }, { city: '东莞市', county: '东莞市' }, { city: '中山市', county: '中山市' }, { city: '潮州市', county: '潮州市' }, { city: '潮州市', county: '潮安县' }, { city: '潮州市', county: '饶平县' }, { city: '揭阳市', county: '揭阳市' }, { city: '揭阳市', county: '揭西县' }, { city: '揭阳市', county: '惠来县' }, { city: '揭阳市', county: '普宁市' }, { city: '云浮市', county: '云浮市' }, { city: '云浮市', county: '新兴县' }, { city: '云浮市', county: '郁南县' }, { city: '云浮市', county: '罗定市' }, ]; const TIME_OUT = 120000; // 创建axios实例 const axiosInstance = axios.create({ baseURL: 'http://api.zjtcn.com/user', timeout: TIME_OUT, proxy: { host: "127.0.0.1", port: "8888" // Fiddler抓包,需要打开Fiddler否则会报connect error }, headers: { 'Cache-Control': 'max-age=0', 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6', }, //responseType: 'document' }); // 响应拦截器 axiosInstance.interceptors.response.use(function (response) { return response.data; }, function (error) { // 对响应错误做点什么 if (error.message.includes('timeout')) { return Promise.reject(`目标网络超时,请稍后再试。(${TIME_OUT}ms)`); } else { return Promise.reject(error); } }); async function post(url, data) { return await axiosInstance.post(url, querystring.stringify(data)); } // 有效期一年,通过购买造价通服务获得 const SERVICE_ID = '2020090003'; const SERVICE_KEY = '97F2A441633F10DFEB5BFC29B3862847'; // 获取后续获取信息价期刊必要的token async function getToken() { // 获取动态码 const dynData = await post('/dyn_code', { service_id: SERVICE_ID }); if (!dynData.response_code) { throw `错误代号${dynData.retCode} 获取动态码失败。`; } // 获取加密字符串 const serviceSecret = await post('/aes', { service_id: SERVICE_ID, service_key: SERVICE_KEY, service_code: dynData.response_code }); if (typeof serviceSecret !== 'string') { throw `错误代号${serviceSecret.retCode} 获取加密字符串错误。`; } const tokenData = await post('/authentication', { service_id: SERVICE_ID, service_secret: serviceSecret }); if (!tokenData.token) { throw `错误代号${tokenData.retCode} 获取token失败。` } return tokenData.token; } const monthMap = { '1': '01月', '2': '02月', '3': '03月', '4': '04月', '5': '05月', '6': '06月', '7': '07月', '8': '08月', '9': '09月', '10': '10月', '11': '11月', '12': '12月', }; // 根据期数范围,获取期数数据 function getPeriodData(from, to) { if (from > to) { return null; } const reg = /(\d+)-(\d+)/; const fromMatch = from.match(reg); const fromYear = +fromMatch[1]; const fromMonth = +fromMatch[2]; const toMatch = to.match(reg); const toYear = +toMatch[1]; const toMonth = +toMatch[2]; let curYear = fromYear; let curMonth = fromMonth; const periods = []; while (curYear <= toYear && curMonth <= toMonth) { periods.push(`${curYear}年-${monthMap[curMonth]}`); if (curMonth === 12) { curYear++; curMonth = 1; } else { curMonth++; } } return periods; } // 获取信息价期刊 async function getPriceInfoSource(token, period, city, county) { const province = '广东'; const area = `${province}-${city}-${county}`; const industry = 1; const existData = await priceInfoSourceModel.find({ period, area, industry }).lean(); if (existData.length) { return existData; } const body = { token, province, city, county, industry, date: `${period}-05` // 天数05表示请求月度数据 }; const sourceData = await post('/gov/get', body); if (!sourceData.results) { // 不抛出错误,不同地区更新信息价期刊的时间不同,如果导入数据时,有地区没发布数据,直接跳过并提示 return `retCode: ${sourceData.retCode} ${sourceData.msg} (${period} ${city} ${county})`; } // 因为造价通接口请求有次数限制,一个地区只能请求一次,为保险起见,将造价通源数据入库 const insertData = sourceData.results.map(item => ({ period, area, industry, subcid: item.subcid, code: item.code, name: item.name, unit: item.unit, price: item.price, taxPrice: item.tax_price, noTaxPrice: item.no_tax_price, specs: item.spec, remark: item.notes, })); if (insertData.length) { await priceInfoSourceModel.insertMany(insertData); } return insertData; } // 获取数据subcid与分类名称的映射表 async function getClassNameMap(compilationID) { const gljLib = await gljLibModel.findOne({ compilationId: compilationID }).lean(); if (!gljLib) { return null; } const classData = await gljClassModel.find({ repositoryId: gljLib.ID }).lean(); const map = {}; const reg = /^\d{4}/; classData.forEach(item => { const name = item.Name || ''; const matched = name.match(reg); if (matched) { map[matched[0]] = name; } }); return map; } /** * 将信息价源数据转换入库 * @param {String} compilationID - 费用定额ID * @param {String} period - 期数 eg: 2020年-09月 * @param {String} area - 地区 eg: 广州市-广州市 * @param {Array} sourceData - 造价通源数据 * @param {Object} classNameMap - 从标准人材机分类树获取的编号-名称映射表 * @return {Void} */ async function saveData(compilationID, period, area, sourceData, classNameMap) { let areaItem = await priceInfoAreaModel.findOne({ compilationID, name: area }).lean(); if (!areaItem) { areaItem = { compilationID, ID: uuidV1(), name: area }; await priceInfoAreaModel.insertMany([areaItem]); } let lib = await priceInfoLibModel.findOne({ compilationID, period }).lean(); if (!lib) { lib = { compilationID, period, ID: uuidV1(), name: `信息价(${period})`, createDate: Date.now() }; await priceInfoLibModel.insertMany([lib]); } const areaID = areaItem.ID; const libID = lib.ID; // 如果该期数该地区下存在数据,则不处理,防止重复插入数据 // 造价通地区数据更新不同步,可能需要多次导入数据补全一期数据,如果已经有数据,说明该地区已经导入成功过了,直接跳过 const existCount = await priceInfoItemModel.count({ compilationID, period, areaID }); if (existCount) { return; } // 分类数据应为空才对,如果有就清空 const existClassCount = await priceInfoClassModel.count({ libID: lib.ID, areaID }); if (existClassCount) { await priceInfoClassModel.remove({ libID: lib.ID, areaID }); } // 导入分类数据及价格信息数据 const otherClassName = '其他'; const curClassMap = { [otherClassName]: { libID, areaID, ID: uuidV1(), ParentID: '-1', name: otherClassName } }; const classData = []; const priceData = []; const splitReg = /([0-9.]+)-([0-9.]+)/; let needOtherClass = false; sourceData.forEach(item => { const className = classNameMap[item.subcid] || otherClassName; if (className === otherClassName) { needOtherClass = true; } if (!curClassMap[className]) { const classItem = { libID, areaID, ID: uuidV1(), ParentID: '-1', NextSiblingID: '-1', name: className }; curClassMap[className] = classItem; const preClassData = classData[classData.length - 1]; if (preClassData) { preClassData.NextSiblingID = classItem.ID; } classData.push(classItem); } const classID = curClassMap[className].ID; const price = item.price || ''; const matchSplitPrice = price.match(splitReg); if (matchSplitPrice) { // 价格字段是区间,需要分割成最低价、最高价两条数据 const minPrice = matchSplitPrice[1]; const maxPrice = matchSplitPrice[2]; priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最低价`, minPrice)); priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最高价`, maxPrice)); } else { priceData.push(transfromSourceItemToPriceItem(item, classID, item.name, item.noTaxPrice)); } }); if (needOtherClass) { const otherClassItem = curClassMap[otherClassName]; otherClassItem.NextSiblingID = classData[0].ID; classData.push(otherClassItem); } const task = [ priceInfoClassModel.insertMany(classData), priceInfoItemModel.insertMany(priceData) ]; await Promise.all(task); // 将信息价源数据转换成信息价数据 function transfromSourceItemToPriceItem(sourceItem, classID, name, noTaxPrice) { return { compilationID, period, name, noTaxPrice, classID, libID, areaID, ID: uuidV1(), code: sourceItem.subcid, unit: sourceItem.unit, specs: sourceItem.specs, taxPrice: sourceItem.taxPrice, remark: sourceItem.remark, }; } } /** * 获取数据 * @param {String} from - 从哪一期开始 eg: 2020-01 * @param {String} to - 从哪一期结束 eg: 2020-05 * @param {String} compilationID - 费用定额ID * @return {Object} */ async function crawlData(from, to, compilationID) { const token = await getToken(); const periods = getPeriodData(from, to); if (!periods) { throw '无效的期数区间。'; } const classNameMap = await getClassNameMap(compilationID); if (!classNameMap) { throw '无有效的分类数据。'; } const hintInfos = []; for (const period of periods) { const sourcePeriod = period.replace(/年|月/g, ''); for (const { city, county } of areas) { const sourceData = await getPriceInfoSource(token, sourcePeriod, city, county); if (typeof sourceData === 'string') { hintInfos.push(sourceData); continue; } if (!sourceData.length) { continue; } await saveData(compilationID, period, `${city}-${county}`, sourceData, classNameMap); } } if (hintInfos.length) { throw hintInfos.join('\n'); } }