|
@@ -0,0 +1,416 @@
|
|
|
+/*
|
|
|
+ * @Descripttion: 广东材料信息价格获取(通过造价通接口)
|
|
|
+ * @Author: vian
|
|
|
+ * @Date: 2020-09-29 11:22:59
|
|
|
+ */
|
|
|
+
|
|
|
+module.exports = {
|
|
|
+ crawlData,
|
|
|
+};
|
|
|
+
|
|
|
+const uuidV1 = require('uuid/v1');
|
|
|
+const mongoose = require('mongoose');
|
|
|
+const axios = require('axios');
|
|
|
+const querystring = require('querystring');
|
|
|
+
|
|
|
+const priceInfoLibModel = mongoose.model('std_price_info_lib');
|
|
|
+const priceInfoClassModel = mongoose.model('std_price_info_class');
|
|
|
+const priceInfoAreaModel = mongoose.model('std_price_info_areas');
|
|
|
+const priceInfoItemModel = mongoose.model('std_price_info_items');
|
|
|
+const priceInfoSourceModel = mongoose.model('std_price_info_source');
|
|
|
+const gljLibModel = mongoose.model('std_glj_lib_map');
|
|
|
+const gljClassModel = mongoose.model('std_glj_lib_gljClass');
|
|
|
+
|
|
|
+// 造价通网页上整理的地区https://gd.zjtcn.com/gov/c_cs_d_t_p1.html
|
|
|
+const areas = [
|
|
|
+ { city: '广州市', county: '广州市' },
|
|
|
+ { city: '广州市', county: '花都区' },
|
|
|
+ { city: '广州市', county: '增城市' },
|
|
|
+ { city: '广州市', county: '从化市' },
|
|
|
+ { city: '韶关市', county: '韶关市' },
|
|
|
+ { city: '韶关市', county: '乐昌市' },
|
|
|
+ { city: '深圳市', county: '深圳市' },
|
|
|
+ { city: '珠海市', county: '珠海市' },
|
|
|
+ { city: '汕头市', county: '汕头市' },
|
|
|
+ { city: '汕头市', county: '濠江区' },
|
|
|
+ { city: '汕头市', county: '潮阳区' },
|
|
|
+ { city: '汕头市', county: '潮南区' },
|
|
|
+ { city: '汕头市', county: '澄海区' },
|
|
|
+ { city: '汕头市', county: '南澳县' },
|
|
|
+ { city: '佛山市', county: '佛山市' },
|
|
|
+ { city: '佛山市', county: '南海区' },
|
|
|
+ { city: '佛山市', county: '顺德区' },
|
|
|
+ { city: '江门市', county: '江门市' },
|
|
|
+ { city: '江门市', county: '新会区' },
|
|
|
+ { city: '江门市', county: '台山市' },
|
|
|
+ { city: '江门市', county: '开平市' },
|
|
|
+ { city: '江门市', county: '鹤山市' },
|
|
|
+ { city: '江门市', county: '恩平市' },
|
|
|
+ { city: '湛江市', county: '湛江市' },
|
|
|
+ { city: '湛江市', county: '遂溪县' },
|
|
|
+ { city: '湛江市', county: '徐闻县' },
|
|
|
+ { city: '湛江市', county: '廉江市' },
|
|
|
+ { city: '湛江市', county: '雷州市' },
|
|
|
+ { city: '湛江市', county: '吴川市' },
|
|
|
+ { city: '茂名市', county: '茂名市' },
|
|
|
+ { city: '茂名市', county: '电白市' },
|
|
|
+ { city: '茂名市', county: '高州市' },
|
|
|
+ { city: '茂名市', county: '化州市' },
|
|
|
+ { city: '茂名市', county: '信宜市' },
|
|
|
+ { city: '肇庆市', county: '肇庆市' },
|
|
|
+ { city: '肇庆市', county: '鼎湖区' },
|
|
|
+ { city: '肇庆市', county: '广宁县' },
|
|
|
+ { city: '肇庆市', county: '怀集县' },
|
|
|
+ { city: '肇庆市', county: '封开县' },
|
|
|
+ { city: '肇庆市', county: '德庆县' },
|
|
|
+ { city: '肇庆市', county: '高要市' },
|
|
|
+ { city: '肇庆市', county: '四会市' },
|
|
|
+ { city: '惠州市', county: '惠州市' },
|
|
|
+ { city: '惠州市', county: '惠阳区' },
|
|
|
+ { city: '惠州市', county: '大亚湾开发区' },
|
|
|
+ { city: '惠州市', county: '博罗县' },
|
|
|
+ { city: '惠州市', county: '惠东县' },
|
|
|
+ { city: '惠州市', county: '龙门县' },
|
|
|
+ { city: '梅州市', county: '梅州市' },
|
|
|
+ { city: '梅州市', county: '梅县' },
|
|
|
+ { city: '梅州市', county: '大埔县' },
|
|
|
+ { city: '梅州市', county: '丰顺县' },
|
|
|
+ { city: '梅州市', county: '平远县' },
|
|
|
+ { city: '汕尾市', county: '汕尾市' },
|
|
|
+ { city: '汕尾市', county: '海丰县' },
|
|
|
+ { city: '汕尾市', county: '陆河县' },
|
|
|
+ { city: '河源市', county: '河源市' },
|
|
|
+ { city: '河源市', county: '紫金县' },
|
|
|
+ { city: '河源市', county: '龙川县' },
|
|
|
+ { city: '河源市', county: '连平县' },
|
|
|
+ { city: '河源市', county: '和平县' },
|
|
|
+ { city: '河源市', county: '东源县' },
|
|
|
+ { city: '阳江市', county: '阳江市' },
|
|
|
+ { city: '阳江市', county: '海陵岛区' },
|
|
|
+ { city: '阳江市', county: '阳西县' },
|
|
|
+ { city: '阳江市', county: '阳春市' },
|
|
|
+ { city: '清远市', county: '清远市' },
|
|
|
+ { city: '清远市', county: '佛冈县' },
|
|
|
+ { city: '清远市', county: '阳山县' },
|
|
|
+ { city: '清远市', county: '连山县' },
|
|
|
+ { city: '清远市', county: '连南县' },
|
|
|
+ { city: '清远市', county: '英德市' },
|
|
|
+ { city: '清远市', county: '连州市' },
|
|
|
+ { city: '东莞市', county: '东莞市' },
|
|
|
+ { city: '中山市', county: '中山市' },
|
|
|
+ { city: '潮州市', county: '潮州市' },
|
|
|
+ { city: '潮州市', county: '潮安县' },
|
|
|
+ { city: '潮州市', county: '饶平县' },
|
|
|
+ { city: '揭阳市', county: '揭阳市' },
|
|
|
+ { city: '揭阳市', county: '揭西县' },
|
|
|
+ { city: '揭阳市', county: '惠来县' },
|
|
|
+ { city: '揭阳市', county: '普宁市' },
|
|
|
+ { city: '云浮市', county: '云浮市' },
|
|
|
+ { city: '云浮市', county: '新兴县' },
|
|
|
+ { city: '云浮市', county: '郁南县' },
|
|
|
+ { city: '云浮市', county: '罗定市' },
|
|
|
+];
|
|
|
+
|
|
|
+const TIME_OUT = 120000;
|
|
|
+
|
|
|
+// 创建axios实例
|
|
|
+const axiosInstance = axios.create({
|
|
|
+ baseURL: 'http://api.zjtcn.com/user',
|
|
|
+ timeout: TIME_OUT,
|
|
|
+ proxy: {
|
|
|
+ host: "127.0.0.1", port: "8888" // Fiddler抓包,需要打开Fiddler否则会报connect error
|
|
|
+ },
|
|
|
+ headers: {
|
|
|
+ 'Cache-Control': 'max-age=0',
|
|
|
+ 'Content-Type': 'application/x-www-form-urlencoded',
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36',
|
|
|
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
|
+ 'Accept-Encoding': 'gzip, deflate',
|
|
|
+ 'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6',
|
|
|
+ },
|
|
|
+ //responseType: 'document'
|
|
|
+});
|
|
|
+
|
|
|
+// 响应拦截器
|
|
|
+axiosInstance.interceptors.response.use(function (response) {
|
|
|
+ return response.data;
|
|
|
+}, function (error) {
|
|
|
+ // 对响应错误做点什么
|
|
|
+ if (error.message.includes('timeout')) {
|
|
|
+ return Promise.reject(`目标网络超时,请稍后再试。(${TIME_OUT}ms)`);
|
|
|
+ } else {
|
|
|
+ return Promise.reject(error);
|
|
|
+ }
|
|
|
+});
|
|
|
+
|
|
|
+async function post(url, data) {
|
|
|
+ return await axiosInstance.post(url, querystring.stringify(data));
|
|
|
+}
|
|
|
+
|
|
|
+// 有效期一年,通过购买造价通服务获得
|
|
|
+const SERVICE_ID = '2020090003';
|
|
|
+const SERVICE_KEY = '97F2A441633F10DFEB5BFC29B3862847';
|
|
|
+
|
|
|
+// 获取后续获取信息价期刊必要的token
|
|
|
+async function getToken() {
|
|
|
+ // 获取动态码
|
|
|
+ const dynData = await post('/dyn_code', { service_id: SERVICE_ID });
|
|
|
+ if (!dynData.response_code) {
|
|
|
+ throw `错误代号${dynData.retCode} 获取动态码失败。`;
|
|
|
+ }
|
|
|
+ // 获取加密字符串
|
|
|
+ const serviceSecret = await post('/aes', { service_id: SERVICE_ID, service_key: SERVICE_KEY, service_code: dynData.response_code });
|
|
|
+ if (typeof serviceSecret !== 'string') {
|
|
|
+ throw `错误代号${serviceSecret.retCode} 获取加密字符串错误。`;
|
|
|
+ }
|
|
|
+ const tokenData = await post('/authentication', { service_id: SERVICE_ID, service_secret: serviceSecret });
|
|
|
+ if (!tokenData.token) {
|
|
|
+ throw `错误代号${tokenData.retCode} 获取token失败。`
|
|
|
+ }
|
|
|
+ return tokenData.token;
|
|
|
+}
|
|
|
+
|
|
|
+const monthMap = {
|
|
|
+ '1': '01月',
|
|
|
+ '2': '02月',
|
|
|
+ '3': '03月',
|
|
|
+ '4': '04月',
|
|
|
+ '5': '05月',
|
|
|
+ '6': '06月',
|
|
|
+ '7': '07月',
|
|
|
+ '8': '08月',
|
|
|
+ '9': '09月',
|
|
|
+ '10': '10月',
|
|
|
+ '11': '11月',
|
|
|
+ '12': '12月',
|
|
|
+};
|
|
|
+
|
|
|
+// 根据期数范围,获取期数数据
|
|
|
+function getPeriodData(from, to) {
|
|
|
+ if (from > to) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ const reg = /(\d+)-(\d+)/;
|
|
|
+ const fromMatch = from.match(reg);
|
|
|
+ const fromYear = +fromMatch[1];
|
|
|
+ const fromMonth = +fromMatch[2];
|
|
|
+ const toMatch = to.match(reg);
|
|
|
+ const toYear = +toMatch[1];
|
|
|
+ const toMonth = +toMatch[2];
|
|
|
+ let curYear = fromYear;
|
|
|
+ let curMonth = fromMonth;
|
|
|
+ const periods = [];
|
|
|
+ while (curYear <= toYear && curMonth <= toMonth) {
|
|
|
+ periods.push(`${curYear}年-${monthMap[curMonth]}`);
|
|
|
+ if (curMonth === 12) {
|
|
|
+ curYear++;
|
|
|
+ curMonth = 1;
|
|
|
+ } else {
|
|
|
+ curMonth++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return periods;
|
|
|
+}
|
|
|
+
|
|
|
+// 获取信息价期刊
|
|
|
+async function getPriceInfoSource(token, period, city, county) {
|
|
|
+ const province = '广东';
|
|
|
+ const area = `${province}-${city}-${county}`;
|
|
|
+ const industry = 1;
|
|
|
+ const existData = await priceInfoSourceModel.find({ period, area, industry }).lean();
|
|
|
+ if (existData.length) {
|
|
|
+ return existData;
|
|
|
+ }
|
|
|
+ const body = {
|
|
|
+ token,
|
|
|
+ province,
|
|
|
+ city,
|
|
|
+ county,
|
|
|
+ industry,
|
|
|
+ date: `${period}-05` // 天数05表示请求月度数据
|
|
|
+ };
|
|
|
+ const sourceData = await post('/gov/get', body);
|
|
|
+ if (!sourceData.results) {
|
|
|
+ // 不抛出错误,不同地区更新信息价期刊的时间不同,如果导入数据时,有地区没发布数据,直接跳过并提示
|
|
|
+ return `retCode: ${sourceData.retCode} ${sourceData.msg} (${period} ${city} ${county})`;
|
|
|
+ }
|
|
|
+ // 因为造价通接口请求有次数限制,一个地区只能请求一次,为保险起见,将造价通源数据入库
|
|
|
+ const insertData = sourceData.results.map(item => ({
|
|
|
+ period,
|
|
|
+ area,
|
|
|
+ industry,
|
|
|
+ subcid: item.subcid,
|
|
|
+ code: item.code,
|
|
|
+ name: item.name,
|
|
|
+ unit: item.unit,
|
|
|
+ price: item.price,
|
|
|
+ taxPrice: item.tax_price,
|
|
|
+ noTaxPrice: item.no_tax_price,
|
|
|
+ specs: item.spec,
|
|
|
+ remark: item.notes,
|
|
|
+ }));
|
|
|
+ if (insertData.length) {
|
|
|
+ await priceInfoSourceModel.insertMany(insertData);
|
|
|
+ }
|
|
|
+ return insertData;
|
|
|
+}
|
|
|
+
|
|
|
+// 获取数据subcid与分类名称的映射表
|
|
|
+async function getClassNameMap(compilationID) {
|
|
|
+ const gljLib = await gljLibModel.findOne({ compilationId: compilationID }).lean();
|
|
|
+ if (!gljLib) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ const classData = await gljClassModel.find({ repositoryId: gljLib.ID }).lean();
|
|
|
+ const map = {};
|
|
|
+ const reg = /^\d{4}/;
|
|
|
+ classData.forEach(item => {
|
|
|
+ const name = item.Name || '';
|
|
|
+ const matched = name.match(reg);
|
|
|
+ if (matched) {
|
|
|
+ map[matched[0]] = name;
|
|
|
+ }
|
|
|
+ });
|
|
|
+ return map;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 将信息价源数据转换入库
|
|
|
+ * @param {String} compilationID - 费用定额ID
|
|
|
+ * @param {String} period - 期数 eg: 2020年-09月
|
|
|
+ * @param {String} area - 地区 eg: 广州市-广州市
|
|
|
+ * @param {Array} sourceData - 造价通源数据
|
|
|
+ * @param {Object} classNameMap - 从标准人材机分类树获取的编号-名称映射表
|
|
|
+ * @return {Void}
|
|
|
+ */
|
|
|
+async function saveData(compilationID, period, area, sourceData, classNameMap) {
|
|
|
+ let areaItem = await priceInfoAreaModel.findOne({ compilationID, name: area }).lean();
|
|
|
+ if (!areaItem) {
|
|
|
+ areaItem = { compilationID, ID: uuidV1(), name: area };
|
|
|
+ await priceInfoAreaModel.insertMany([areaItem]);
|
|
|
+ }
|
|
|
+ let lib = await priceInfoLibModel.findOne({ compilationID, period }).lean();
|
|
|
+ if (!lib) {
|
|
|
+ lib = {
|
|
|
+ compilationID,
|
|
|
+ period,
|
|
|
+ ID: uuidV1(),
|
|
|
+ name: `信息价(${period})`,
|
|
|
+ createDate: Date.now()
|
|
|
+ };
|
|
|
+ await priceInfoLibModel.insertMany([lib]);
|
|
|
+ }
|
|
|
+ const areaID = areaItem.ID;
|
|
|
+ const libID = lib.ID;
|
|
|
+ // 如果该期数该地区下存在数据,则不处理,防止重复插入数据
|
|
|
+ // 造价通地区数据更新不同步,可能需要多次导入数据补全一期数据,如果已经有数据,说明该地区已经导入成功过了,直接跳过
|
|
|
+ const existCount = await priceInfoItemModel.count({ compilationID, period, areaID });
|
|
|
+ if (existCount) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ // 分类数据应为空才对,如果有就清空
|
|
|
+ const existClassCount = await priceInfoClassModel.count({ libID: lib.ID, areaID });
|
|
|
+ if (existClassCount) {
|
|
|
+ await priceInfoClassModel.remove({ libID: lib.ID, areaID });
|
|
|
+ }
|
|
|
+ // 导入分类数据及价格信息数据
|
|
|
+ const otherClassName = '其他';
|
|
|
+ const curClassMap = {
|
|
|
+ [otherClassName]: { libID, areaID, ID: uuidV1(), ParentID: '-1', name: otherClassName }
|
|
|
+ };
|
|
|
+ const classData = [];
|
|
|
+ const priceData = [];
|
|
|
+ const splitReg = /([0-9.]+)-([0-9.]+)/;
|
|
|
+ let needOtherClass = false;
|
|
|
+ sourceData.forEach(item => {
|
|
|
+ const className = classNameMap[item.subcid] || otherClassName;
|
|
|
+ if (className === otherClassName) {
|
|
|
+ needOtherClass = true;
|
|
|
+ }
|
|
|
+ if (!curClassMap[className]) {
|
|
|
+ const classItem = { libID, areaID, ID: uuidV1(), ParentID: '-1', NextSiblingID: '-1', name: className };
|
|
|
+ curClassMap[className] = classItem;
|
|
|
+ const preClassData = classData[classData.length - 1];
|
|
|
+ if (preClassData) {
|
|
|
+ preClassData.NextSiblingID = classItem.ID;
|
|
|
+ }
|
|
|
+ classData.push(classItem);
|
|
|
+ }
|
|
|
+ const classID = curClassMap[className].ID;
|
|
|
+ const price = item.price || '';
|
|
|
+ const matchSplitPrice = price.match(splitReg);
|
|
|
+ if (matchSplitPrice) { // 价格字段是区间,需要分割成最低价、最高价两条数据
|
|
|
+ const minPrice = matchSplitPrice[1];
|
|
|
+ const maxPrice = matchSplitPrice[2];
|
|
|
+ priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最低价`, minPrice));
|
|
|
+ priceData.push(transfromSourceItemToPriceItem(item, classID, `${item.name}-最高价`, maxPrice));
|
|
|
+ } else {
|
|
|
+ priceData.push(transfromSourceItemToPriceItem(item, classID, item.name, item.noTaxPrice));
|
|
|
+ }
|
|
|
+ });
|
|
|
+ if (needOtherClass) {
|
|
|
+ const otherClassItem = curClassMap[otherClassName];
|
|
|
+ otherClassItem.NextSiblingID = classData[0].ID;
|
|
|
+ classData.push(otherClassItem);
|
|
|
+ }
|
|
|
+ const task = [
|
|
|
+ priceInfoClassModel.insertMany(classData),
|
|
|
+ priceInfoItemModel.insertMany(priceData)
|
|
|
+ ];
|
|
|
+ await Promise.all(task);
|
|
|
+ // 将信息价源数据转换成信息价数据
|
|
|
+ function transfromSourceItemToPriceItem(sourceItem, classID, name, noTaxPrice) {
|
|
|
+ return {
|
|
|
+ compilationID,
|
|
|
+ period,
|
|
|
+ name,
|
|
|
+ noTaxPrice,
|
|
|
+ classID,
|
|
|
+ libID,
|
|
|
+ areaID,
|
|
|
+ ID: uuidV1(),
|
|
|
+ code: sourceItem.subcid,
|
|
|
+ unit: sourceItem.unit,
|
|
|
+ specs: sourceItem.specs,
|
|
|
+ taxPrice: sourceItem.taxPrice,
|
|
|
+ remark: sourceItem.remark,
|
|
|
+ };
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * 获取数据
|
|
|
+ * @param {String} from - 从哪一期开始 eg: 2020-01
|
|
|
+ * @param {String} to - 从哪一期结束 eg: 2020-05
|
|
|
+ * @param {String} compilationID - 费用定额ID
|
|
|
+ * @return {Object}
|
|
|
+ */
|
|
|
+async function crawlData(from, to, compilationID) {
|
|
|
+ const token = await getToken();
|
|
|
+ const periods = getPeriodData(from, to);
|
|
|
+ if (!periods) {
|
|
|
+ throw '无效的期数区间。';
|
|
|
+ }
|
|
|
+ const classNameMap = await getClassNameMap(compilationID);
|
|
|
+ if (!classNameMap) {
|
|
|
+ throw '无有效的分类数据。';
|
|
|
+ }
|
|
|
+ const hintInfos = [];
|
|
|
+ for (const period of periods) {
|
|
|
+ const sourcePeriod = period.replace(/年|月/g, '');
|
|
|
+ for (const { city, county } of areas) {
|
|
|
+ const sourceData = await getPriceInfoSource(token, sourcePeriod, city, county);
|
|
|
+ if (typeof sourceData === 'string') {
|
|
|
+ hintInfos.push(sourceData);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!sourceData.length) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ await saveData(compilationID, period, `${city}-${county}`, sourceData, classNameMap);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (hintInfos.length) {
|
|
|
+ throw hintInfos.join('\n');
|
|
|
+ }
|
|
|
+}
|