Commit 01d0575a authored by 姜登's avatar 姜登

master

parents
,cid,city,order_id,login,ID,t_ID,name,t_name,company,t_company,status,t_status,customer_id,t_customer_id,card,t_card,company_id,t_company_id,phone,t_phone,record_date,t_record_date,balance,t_balance,deposit_amount,t_deposit_amount,deposit_base,t_deposit_base,person_rate,t_person_rate,company_rate,t_company_rate,deposit_rate,t_deposit_rate,birthday,t_birthday,init_date,t_init_date,start_date,t_start_date,rate,t_rate,缺少字段,t_缺少字段,字段缺失或冗余,t_字段缺失或冗余,gjj_details,t_gjj_details,newest_account,t_newest_account,detail_amount,t_detail_amount,detail_record_date,t_detail_record_date,detail_record_month,t_detail_record_month,detail_balance,t_detail_balance,detail_gjj_type,t_detail_gjj_type,analyzed,t_analyzed
0,,湘潭206,000aca4f-3ecc-474e-b188-b2032ec35145,"{'step1': {'ID': '360731198410035312', 'gjjAccount': '887356', 'password': '841018', 'code': 'myfw'}, 'cityurl': 'http://zfgjj.xiangtan.gov.cn/'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '2', 'back_cont_last_times': '2', 'cont_max_times': '2', 'back_cont_max_times': '40', 'sum_times': '12', 'back_sum_times': '40', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
1,,荆州68,019623a3-c4e1-43c6-adef-fa2b9d03325c,"{'step1': {'ID': '422403198010200012', 'password': 'zy801020', 'code': '359f'}, 'cityurl': 'http://58.54.135.133/wt-web/login'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '129', 'back_cont_last_times': '129', 'cont_max_times': '129', 'back_cont_max_times': '129', 'sum_times': '129', 'back_sum_times': '144', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
2,,宜宾96,037b7a3d-c0a2-42a2-9b2c-bd3ad9b38a2d,"{'step1': {'ID': '51253419760125539X', 'password': '3270821'}, 'cityurl': 'http://gjjcx.yibin.gov.cn/search.asp'}",,"ID配置为完整: 不是由数字,""x""组成或者长度异常",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,岳阳43,001f66a8-25ab-4d07-b3f1-7fe3931f0324,"{'step1': {'ID': '430624197107120033', 'password': '000000'}, 'cityurl': 'http://www.yygjj.gov.cn/'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '0', 'back_cont_last_times': '27', 'cont_max_times': '0', 'back_cont_max_times': '28', 'sum_times': '0', 'back_sum_times': '58', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
This diff is collapsed.
# -*- coding: utf8 -*-
import configs_default
def merge(defaults, override):
r = {}
for k, v in defaults.items():
if k in override:
if isinstance(v, dict):
r[k] = merge(v, override[k])
else:
r[k] = override[k]
else:
r[k] = v
return r
def toDict(d):
result = dict()
for k, v in d.items():
result[k] = toDict(v) if isinstance(v, dict) else v
return result
configs = configs_default.configs
try:
import configs_override
configs = merge(configs, configs_override.configs)
except ImportError:
pass
configs = toDict(configs)
configs = {
'debug': True,
'limit': 5,
'mysql': {
'host': 'rm-bp1272001633qc0x9o.mysql.rds.aliyuncs.com',
'user': 'query',
'pwd': '5gqR2EQK',
},
'mongo': {
'query': 'mongodb://root:CQ72J2qKY47edG7@112.124.105.123:3717/Gjj'
},
'log': {
'info': './'
},
'rules': {
# 匹配到了, 并和m的值相等就是错的
'0': {'_re': '^[\s\S]+$', 'desc': '但是有值', 'm': True},
'1': {'_re': '', 'desc': '字段缺失', 'm': False},
# 'ID': {
'2': {'_re': r'^\d{18}$|^\d{17}X$', 'desc': '不是由数字,"x"组成或者长度异常', 'm': False},
'3': {'_re': r'[\d*x]{4,18}', 'desc': '不是由数字,"x", "*"组成或者长度不在[4,18]之内', 'm': False},
# 'name': {
'5': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.]', 'desc': '存在非(中文, ".")字符', 'm': True},
'6': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.**]', 'desc': '存在非(中文, ".", "*", "*")字符', 'm': True},
# 'company': {
# '8': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{30,}$', 'desc': '长度不在[1, 30]之内', 'm': True},
'8': {'_re': r'[*#?]+|^\d+$', 'desc': '存在("*","#","?")字符或仅有数字组成', 'm': True},
'9': {'_re': r'[#?]+|^\d+$', 'desc': '存在("#","?")字符或仅有数字组成', 'm': True},
# status
'a': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{7,}$', 'desc': '长度不在[1, 7]之内', 'm': True},
'b': {'_re': r'[^\u4e00-\u9fa5\u3400-\u4DB5\ue863()()]', 'desc': '存在非(中文, "(", ")")字符', 'm': True},
# company_id customer_id
'c': {'_re': r'^[\da-zA-Z-]+$', 'desc': '不是由数字, 字母, "-"组成', 'm': False},
'd': {'_re': r'^[\d*a-zA-Z-]+$', 'desc': '不是由数字, 字母, "*", "-"组成', 'm': False},
'e': {'_re': r'^[\s\S]{11}$|^[\s\S]{6,9}$|^\d{3,4}-\d{6,8}$', 'desc': '长度不是11位,[6,8]位, 或者不是xxxx-xxxxxxxx格式',
'm': False},
# phone
'f': {'_re': r'^[\d-]+$', 'desc': '不是由数字, "-"组成', 'm': False},
'g': {'_re': r'^[\d*-]+$', 'desc': '不是由数字, ,"-", "*"组成', 'm': False},
# card
'h': {'_re': r'^[\w]+$|^\d{3,4}-\d{6,8}$|^\d{3,4}-\d{14,15}$', 'desc': '不是由数字,组成', 'm': False},
'i': {'_re': r'^[\d*]+$', 'desc': '不是由数字, "*"组成', 'm': False},
# 日期格式
'j': {'_re': r'^\d{4}-\d{2}-\d{2}$', 'desc': '不是yyyy-mm-dd格式', 'm': False},
#
'k': {'_re': r'', 'desc': '不在1990-01-01和当前日期之间', 'm': False},
'l': {'_re': r'', 'desc': '与明细最新记录日期不符', 'm': False},
'm': {'_re': r'', 'desc': '存在更近的日期', 'm': False},
'n': {'_re': r'', 'desc': '没有从明细里取', 'm': False},
'o': {'_re': r'', 'desc': '非正常账号', 'm': False},
# 金额
'p': {'_re': r'^[\d\.]+$', 'desc': '存在非(数字, ".")字符', 'm': False},
'q': {'_re': r'^\.', 'desc': '第一位是"."', 'm': True},
# 比例
'r': {'_re': r'^0\.\d{1,5}$|^0$', 'desc': '不是0.xxxxx的格式', 'm': False},
# 上下限
's': {'_re': r'', 'desc': '小于下限', 'm': False},
't': {'_re': r'', 'desc': '大于上限', 'm': False},
'u': {'_re': r'', 'desc': '个人缴存比例+公司缴存比例不等于缴存比例', 'm': False},
'v': {'_re': r'', 'desc': '大于开始缴存时间或record_date', 'm': False},
'w': {'_re': r'', 'desc': 'start_date大于record_date', 'm': False},
'x': {'_re': r'', 'desc': '大于明细里第一条记录的日期', 'm': False},
'y': {'_re': r'', 'desc': '', 'm': False},
'z': {'_re': r'', 'desc': '与校验有出入', 'm': False},
'a1': {'_re': r'', 'desc': '明细数据为空', 'm': False},
'a2': {'_re': r'', 'desc': '月缴有误', 'm': False},
'a3': {'_re': r'', 'desc': '王鲁宁的分析数据有误', 'm': False},
'a4': {'_re': r'', 'desc': '明细中缴存类型为提取/偿还/支取/还贷, 但是金额不为负', 'm': False},
'a5': {'_re': r'', 'desc': '明细中gjj_type值不是0或1', 'm': False},
'a6': {'_re': r'', 'desc': '明细中金额格式存在非(数字, ".", "-")字符', 'm': False},
'a7': {'_re': r'', 'desc': '明细中日期格式不是yyyy-mm或yyyy-mm-dd', 'm': False},
'a8': {'_re': r'', 'desc': '明细中有record_month但是没有record_date', 'm': False},
# 是否与明细里的公司相等
},
'use': {
'ID': {
'1': ['2'],
'0': ['3'],
'-1': [],
},
'name': {
'1': ['5'],
'0': ['6'],
'-1': [],
},
'company': {
'1': ['8'],
'0': ['9'],
'-1': ['0'],
},
'status': {
'1': ['a', 'b'],
'-1': ['0'],
},
'customer_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'card': {
'1': ['h'],
'0': ['i'],
'-1': [],
},
'company_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'phone': {
'1': ['f', 'e'],
'0': ['g', 'e'],
'-1': [],
},
'record_date': {
'1': ['j'],
'-1': []
},
'balance': {
'1': ['p', 'q'],
'-1': []
},
'deposit_amount': {
'1': ['p'],
'-1': []
},
'deposit_base': {
'1': ['p'],
'-1': []
},
'person_rate': {
'1': ['r'],
'-1': [],
},
'company_rate': {
'1': ['r'],
'-1': [],
},
'deposit_rate': {
'1': ['r'],
'-1': [],
},
'birthday': {
'1': ['j'],
'-1':[]
},
'init_date': {
'1': ['j'],
'-1': []
},
'start_date': {
'1': ['j'],
'-1': []
},
'rate': {'': ''},
'缺少字段': {'': ''},
'字段缺失或冗余': {'': ''},
'gjj_details': {'': ''},
# 王鲁宁的分析数据
'newest_account': {'': ''},
'detail_amount': {'': ''},
'detail_record_date': {'': ''},
'detail_record_month': {'': ''},
'detail_balance': {'': ''},
'detail_gjj_type': {'': ''},
'analyzed': {'': ''},
}
}
# 枣庄 临沧 定州 双鸭山 平顶山 宿州 赤峰 驻马店 保定 四平
configs = {
'mysql': {
'host': 'rds-jb-08.mysql.rds.aliyuncs.com',
},
'debug': False,
'limit': 500,
'log': {
'info': '/root/MicroserviceTest/gjj_support/logs'
}
}
# coding: utf-8
# author: hw
from thread import Threading
import json
class Manager:
def __init__(self):
self.tasks = dict()
def create_task(self, name):
self.tasks[name] = 'wait'
:w
This diff is collapsed.
import logging
import os
import sys
import traceback
import pandas as pd
from config import configs
def logger_config(filename):
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename, 'w', encoding='utf8')
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
logger.addHandler(handler)
return logger
def test(x):
return x
if __name__ == '__main__':
data = [
{'city': '杭州', 'ID': '33018219940121216', 't_ID': '1', 'name': 'anan', 't_name': '1'},
{'city': '上海', 'ID': '33018219940165466', 't_ID': '1', 'name': 'lolo', 't_name': '1'},
{'city': '上海', 'ID': '2326****0322', 't_ID': '1', 'name': 'qwe', 't_name': '2'},
{'city': '上海', 'ID': '2326****0322', 't_ID': '1', 'name': 'qwe', 't_name': '2'},
{'city': '上海', 'ID': '2326****12365', 't_ID': '1',},
{'city': '上海', 'ID': '34234****23123', 't_ID': '1', 'name': 'aaa', 't_name': '4'},
{'city': '北京', 'ID': '3435****2763', 't_ID': '1', 'name': 'ccc', 't_name': '4'},
{'city': '北京', 'ID': '2326****67675', 't_ID': '1', 'name': 'ddd', 't_name': '3'},
{'city': '雅安', 'name': 'zzz', 't_name': '4'},
]
df = pd.DataFrame(data)
df.fillna('')
print(df.ix[0]['ID'])
grouped = df.groupby([df['city'], df['t_name']])
for (cc, k), g in grouped:
print(cc, k)
d = g[:2]
print(d.T.to_dict().values())
print(configs)
# -*- coding: utf-8 -*-
import datetime
from dateutil.relativedelta import relativedelta
def away_today(date):
""" caluate how long from today """
if date and isinstance(date, str):
today = datetime.date.today()
date = date.split('-')
date = datetime.date(year=int(date[0]), month=int(date[1]), day=int(date[2]))
datedelta = relativedelta(today, date)
return datedelta.years, datedelta.months, datedelta.days
def format_date(raw):
year = int(raw[:4])
mon = int(raw[4:])
return datetime.date(year=year, month=mon, day=1)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
# -*- coding: utf8 -*-
import configs_default
def merge(defaults, override):
r = {}
for k, v in defaults.items():
if k in override:
if isinstance(v, dict):
r[k] = merge(v, override[k])
else:
r[k] = override[k]
else:
r[k] = v
return r
def toDict(d):
result = dict()
for k, v in d.items():
result[k] = toDict(v) if isinstance(v, dict) else v
return result
configs = configs_default.configs
try:
import configs_override
configs = merge(configs, configs_override.configs)
except ImportError:
pass
configs = toDict(configs)
configs = {
'debug': True,
'limit': 500,
'mysql': {
'host': 'rm-bp1272001633qc0x9o.mysql.rds.aliyuncs.com',
'user': 'query',
'pwd': '5gqR2EQK',
},
'mongo': {
'query': 'mongodb://root:CQ72J2qKY47edG7@112.124.105.123:3717/Gjj'
},
'log': {
'info': './'
},
'rules': {
# 匹配到了, 并和m的值相等就是错的
'0': {'_re': '^[\s\S]+$', 'desc': '但是有值', 'm': True},
'1': {'_re': '', 'desc': '字段缺失', 'm': False},
# 'ID': {
'2': {'_re': r'^\d{18}$|^\d{17}X$', 'desc': '不是由数字,"x"组成或者长度异常', 'm': False},
'3': {'_re': r'[\d*x]{4,18}', 'desc': '不是由数字,"x", "*"组成或者长度不在[4,18]之内', 'm': False},
# 'name': {
'5': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.]', 'desc': '存在非(中文, ".")字符', 'm': True},
'6': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.**]', 'desc': '存在非(中文, ".", "*", "*")字符', 'm': True},
# 'company': {
# '8': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{30,}$', 'desc': '长度不在[1, 30]之内', 'm': True},
'8': {'_re': r'[*#?]+|^\d+$', 'desc': '存在("*","#","?")字符或仅有数字组成', 'm': True},
'9': {'_re': r'[#?]+|^\d+$', 'desc': '存在("#","?")字符或仅有数字组成', 'm': True},
# status
'a': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{7,}$', 'desc': '长度不在[1, 7]之内', 'm': True},
'b': {'_re': r'[^\u4e00-\u9fa5\u3400-\u4DB5\ue863()()]', 'desc': '存在非(中文, "(", ")")字符', 'm': True},
# company_id customer_id
'c': {'_re': r'^[\da-zA-Z-]+$', 'desc': '不是由数字, 字母, "-"组成', 'm': False},
'd': {'_re': r'^[\d*a-zA-Z-]+$', 'desc': '不是由数字, 字母, "*", "-"组成', 'm': False},
'e': {'_re': r'^[\s\S]{11}$|^[\s\S]{6,9}$|^\d{3,4}-\d{6,8}$', 'desc': '长度不是11位,[6,8]位, 或者不是xxxx-xxxxxxxx格式',
'm': False},
# phone
'f': {'_re': r'^[\d-]+$', 'desc': '不是由数字, "-"组成', 'm': False},
'g': {'_re': r'^[\d*-]+$', 'desc': '不是由数字, ,"-", "*"组成', 'm': False},
# card
'h': {'_re': r'^[\d]+$', 'desc': '不是由数字,组成', 'm': False},
'i': {'_re': r'^[\d*]+$', 'desc': '不是由数字, "*"组成', 'm': False},
# 日期格式
'j': {'_re': r'^\d{4}-\d{2}-\d{2}$', 'desc': '不是yyyy-mm-dd格式', 'm': False},
#
'k': {'_re': r'', 'desc': '不在1990-01-01和当前日期之间', 'm': False},
'l': {'_re': r'', 'desc': '与明细最新记录日期不符', 'm': False},
'm': {'_re': r'', 'desc': '存在更近的日期', 'm': False},
'n': {'_re': r'', 'desc': '没有从明细里取', 'm': False},
'o': {'_re': r'', 'desc': '非正常账号', 'm': False},
# 金额
'p': {'_re': r'^[\d\.]+$', 'desc': '存在非(数字, ".")字符', 'm': False},
'q': {'_re': r'^\.', 'desc': '第一位是"."', 'm': True},
# 比例
'r': {'_re': r'^0\.\d{1,5}$|^0$', 'desc': '不是0.xxxxx的格式', 'm': False},
# 上下限
's': {'_re': r'', 'desc': '小于下限', 'm': False},
't': {'_re': r'', 'desc': '大于上限', 'm': False},
'u': {'_re': r'', 'desc': '个人缴存比例+公司缴存比例不等于缴存比例', 'm': False},
'v': {'_re': r'', 'desc': '大于开始缴存时间或record_date', 'm': False},
'w': {'_re': r'', 'desc': 'start_date大于record_date', 'm': False},
'x': {'_re': r'', 'desc': '大于明细里第一条记录的日期', 'm': False},
'y': {'_re': r'', 'desc': '', 'm': False},
'z': {'_re': r'', 'desc': '与校验有出入', 'm': False},
'a1': {'_re': r'', 'desc': '明细数据为空', 'm': False},
'a2': {'_re': r'', 'desc': '月缴有误', 'm': False},
'a3': {'_re': r'', 'desc': '王鲁宁的分析数据有误', 'm': False},
'a4': {'_re': r'', 'desc': '明细中缴存类型为提取/偿还/支取/还贷, 但是金额不为负', 'm': False},
'a5': {'_re': r'', 'desc': '明细中gjj_type值不是0或1', 'm': False},
'a6': {'_re': r'', 'desc': '明细中金额格式存在非(数字, ".", "-")字符', 'm': False},
'a7': {'_re': r'', 'desc': '明细中日期格式不是yyyy-mm或yyyy-mm-dd', 'm': False},
'a8': {'_re': r'', 'desc': '明细中有record_month但是没有record_date', 'm': False},
# 是否与明细里的公司相等
},
'use': {
'ID': {
'1': ['2'],
'0': ['3'],
'-1': [],
},
'name': {
'1': ['5'],
'0': ['6'],
'-1': [],
},
'company': {
'1': ['8'],
'0': ['9'],
'-1': ['0'],
},
'status': {
'1': ['a', 'b'],
'-1': ['0'],
},
'customer_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'card': {
'1': ['h'],
'0': ['i'],
'-1': [],
},
'company_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'phone': {
'1': ['f', 'e'],
'0': ['g', 'e'],
'-1': [],
},
'record_date': {
'1': ['j'],
'-1': []
},
'balance': {
'1': ['p', 'q'],
'-1': []
},
'deposit_amount': {
'1': ['p'],
'-1': []
},
'deposit_base': {
'1': ['p'],
'-1': []
},
'person_rate': {
'1': ['r'],
'-1': [],
},
'company_rate': {
'1': ['r'],
'-1': [],
},
'deposit_rate': {
'1': ['r'],
'-1': [],
},
'birthday': {
'1': ['j'],
'-1':[]
},
'init_date': {
'1': ['j'],
'-1': []
},
'start_date': {
'1': ['j'],
'-1': []
},
'rate': {'': ''},
'缺少字段': {'': ''},
'字段缺失或冗余': {'': ''},
'gjj_details': {'': ''},
# 王鲁宁的分析数据
'newest_account': {'': ''},
'detail_amount': {'': ''},
'detail_record_date': {'': ''},
'detail_record_month': {'': ''},
'detail_balance': {'': ''},
'detail_gjj_type': {'': ''},
'analyzed': {'': ''},
}
}
# 枣庄 临沧 定州 双鸭山 平顶山 宿州 赤峰 驻马店 保定 四平
configs = {
'mysql': {
'host': 'rds-jb-08.mysql.rds.aliyuncs.com',
},
'debug': True,
'limit': 50,
'log': {
'info': './'
}
}
This diff is collapsed.
# -*- coding: utf-8 -*-
import datetime
from dateutil.relativedelta import relativedelta
def away_today(date):
""" caluate how long from today """
if date and isinstance(date, str):
today = datetime.date.today()
date = date.split('-')
date = datetime.date(year=int(date[0]), month=int(date[1]), day=int(date[2]))
datedelta = relativedelta(today, date)
return datedelta.years, datedelta.months, datedelta.days
def format_date(raw):
year = int(raw[:4])
mon = int(raw[4:])
return datetime.date(year=year, month=mon, day=1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment