Commit 01d0575a authored by 姜登's avatar 姜登

master

parents
,cid,city,order_id,login,ID,t_ID,name,t_name,company,t_company,status,t_status,customer_id,t_customer_id,card,t_card,company_id,t_company_id,phone,t_phone,record_date,t_record_date,balance,t_balance,deposit_amount,t_deposit_amount,deposit_base,t_deposit_base,person_rate,t_person_rate,company_rate,t_company_rate,deposit_rate,t_deposit_rate,birthday,t_birthday,init_date,t_init_date,start_date,t_start_date,rate,t_rate,缺少字段,t_缺少字段,字段缺失或冗余,t_字段缺失或冗余,gjj_details,t_gjj_details,newest_account,t_newest_account,detail_amount,t_detail_amount,detail_record_date,t_detail_record_date,detail_record_month,t_detail_record_month,detail_balance,t_detail_balance,detail_gjj_type,t_detail_gjj_type,analyzed,t_analyzed
0,,湘潭206,000aca4f-3ecc-474e-b188-b2032ec35145,"{'step1': {'ID': '360731198410035312', 'gjjAccount': '887356', 'password': '841018', 'code': 'myfw'}, 'cityurl': 'http://zfgjj.xiangtan.gov.cn/'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '2', 'back_cont_last_times': '2', 'cont_max_times': '2', 'back_cont_max_times': '40', 'sum_times': '12', 'back_sum_times': '40', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
1,,荆州68,019623a3-c4e1-43c6-adef-fa2b9d03325c,"{'step1': {'ID': '422403198010200012', 'password': 'zy801020', 'code': '359f'}, 'cityurl': 'http://58.54.135.133/wt-web/login'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '129', 'back_cont_last_times': '129', 'cont_max_times': '129', 'back_cont_max_times': '129', 'sum_times': '129', 'back_sum_times': '144', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
2,,宜宾96,037b7a3d-c0a2-42a2-9b2c-bd3ad9b38a2d,"{'step1': {'ID': '51253419760125539X', 'password': '3270821'}, 'cityurl': 'http://gjjcx.yibin.gov.cn/search.asp'}",,"ID配置为完整: 不是由数字,""x""组成或者长度异常",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,岳阳43,001f66a8-25ab-4d07-b3f1-7fe3931f0324,"{'step1': {'ID': '430624197107120033', 'password': '000000'}, 'cityurl': 'http://www.yygjj.gov.cn/'}",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"{'cont_last_times': '0', 'back_cont_last_times': '27', 'cont_max_times': '0', 'back_cont_max_times': '28', 'sum_times': '0', 'back_sum_times': '58', 'newest_account': '1'}",analyzed配置为None: 王鲁宁的分析数据有误
2018-04-17 10:21:17,847-INFO-{'debug': True, 'limit': 50, 'mysql': {'host': 'rds-jb-08.mysql.rds.aliyuncs.com', 'user': 'query', 'pwd': '5gqR2EQK'}, 'mongo': {'query': 'mongodb://root:CQ72J2qKY47edG7@112.124.105.123:3717/Gjj'}, 'log': {'info': './'}, 'rules': {'0': {'_re': '^[\\s\\S]+$', 'desc': '但是有值', 'm': True}, '1': {'_re': '', 'desc': '字段缺失', 'm': False}, '2': {'_re': '^\\d{18}$|^\\d{17}X$', 'desc': '不是由数字,"x"组成或者长度异常', 'm': False}, '3': {'_re': '[\\d*x]{4,18}', 'desc': '不是由数字,"x", "*"组成或者长度不在[4,18]之内', 'm': False}, '5': {'_re': '[^\\u4e00-\\u9fa5\\u8d5f\\ue863·.﹒.]', 'desc': '存在非(中文, ".")字符', 'm': True}, '6': {'_re': '[^\\u4e00-\\u9fa5\\u8d5f\\ue863·.﹒.**]', 'desc': '存在非(中文, ".", "*", "*")字符', 'm': True}, '8': {'_re': '[*#?]+|^\\d+$', 'desc': '存在("*","#","?")字符或仅有数字组成', 'm': True}, '9': {'_re': '[#?]+|^\\d+$', 'desc': '存在("#","?")字符或仅有数字组成', 'm': True}, 'a': {'_re': '^[\\s\\S]{0,1}$|^[\\s\\S]{7,}$', 'desc': '长度不在[1, 7]之内', 'm': True}, 'b': {'_re': '[^\\u4e00-\\u9fa5\\u3400-\\u4DB5\\ue863()()]', 'desc': '存在非(中文, "(", ")")字符', 'm': True}, 'c': {'_re': '^[\\da-zA-Z-]+$', 'desc': '不是由数字, 字母, "-"组成', 'm': False}, 'd': {'_re': '^[\\d*a-zA-Z-]+$', 'desc': '不是由数字, 字母, "*", "-"组成', 'm': False}, 'e': {'_re': '^[\\s\\S]{11}$|^[\\s\\S]{6,9}$|^\\d{3,4}-\\d{6,8}$', 'desc': '长度不是11位,[6,8]位, 或者不是xxxx-xxxxxxxx格式', 'm': False}, 'f': {'_re': '^[\\d-]+$', 'desc': '不是由数字, "-"组成', 'm': False}, 'g': {'_re': '^[\\d*-]+$', 'desc': '不是由数字, ,"-", "*"组成', 'm': False}, 'h': {'_re': '^[\\d]+$', 'desc': '不是由数字,组成', 'm': False}, 'i': {'_re': '^[\\d*]+$', 'desc': '不是由数字, "*"组成', 'm': False}, 'j': {'_re': '^\\d{4}-\\d{2}-\\d{2}$', 'desc': '不是yyyy-mm-dd格式', 'm': False}, 'k': {'_re': '', 'desc': '不在1990-01-01和当前日期之间', 'm': False}, 'l': {'_re': '', 'desc': '与明细最新记录日期不符', 'm': False}, 'm': {'_re': '', 'desc': '存在更近的日期', 'm': False}, 'n': {'_re': '', 'desc': '没有从明细里取', 'm': False}, 'o': {'_re': '', 'desc': '非正常账号', 'm': False}, 'p': {'_re': '^[\\d\\.]+$', 'desc': '存在非(数字, ".")字符', 'm': False}, 'q': {'_re': '^\\.', 'desc': '第一位是"."', 'm': True}, 'r': {'_re': '^0\\.\\d{1,5}$|^0$', 'desc': '不是0.xxxxx的格式', 'm': False}, 's': {'_re': '', 'desc': '小于下限', 'm': False}, 't': {'_re': '', 'desc': '大于上限', 'm': False}, 'u': {'_re': '', 'desc': '个人缴存比例+公司缴存比例不等于缴存比例', 'm': False}, 'v': {'_re': '', 'desc': '大于开始缴存时间或record_date', 'm': False}, 'w': {'_re': '', 'desc': 'start_date大于record_date', 'm': False}, 'x': {'_re': '', 'desc': '大于明细里第一条记录的日期', 'm': False}, 'y': {'_re': '', 'desc': '', 'm': False}, 'z': {'_re': '', 'desc': '与校验有出入', 'm': False}, 'a1': {'_re': '', 'desc': '明细数据为空', 'm': False}, 'a2': {'_re': '', 'desc': '月缴有误', 'm': False}, 'a3': {'_re': '', 'desc': '王鲁宁的分析数据有误', 'm': False}, 'a4': {'_re': '', 'desc': '明细中缴存类型为提取/偿还/支取/还贷, 但是金额不为负', 'm': False}, 'a5': {'_re': '', 'desc': '明细中gjj_type值不是0或1', 'm': False}, 'a6': {'_re': '', 'desc': '明细中金额格式存在非(数字, ".", "-")字符', 'm': False}, 'a7': {'_re': '', 'desc': '明细中日期格式不是yyyy-mm或yyyy-mm-dd', 'm': False}, 'a8': {'_re': '', 'desc': '明细中有record_month但是没有record_date', 'm': False}}, 'use': {'ID': {'1': ['2'], '0': ['3'], '-1': []}, 'name': {'1': ['5'], '0': ['6'], '-1': []}, 'company': {'1': ['8'], '0': ['9'], '-1': ['0']}, 'status': {'1': ['a', 'b'], '-1': ['0']}, 'customer_id': {'1': ['c'], '0': ['d'], '-1': []}, 'card': {'1': ['h'], '0': ['i'], '-1': []}, 'company_id': {'1': ['c'], '0': ['d'], '-1': []}, 'phone': {'1': ['f', 'e'], '0': ['g', 'e'], '-1': []}, 'record_date': {'1': ['j'], '-1': []}, 'balance': {'1': ['p', 'q'], '-1': []}, 'deposit_amount': {'1': ['p'], '-1': []}, 'deposit_base': {'1': ['p'], '-1': []}, 'person_rate': {'1': ['r'], '-1': []}, 'company_rate': {'1': ['r'], '-1': []}, 'deposit_rate': {'1': ['r'], '-1': []}, 'birthday': {'1': ['j'], '-1': []}, 'init_date': {'1': ['j'], '-1': []}, 'start_date': {'1': ['j'], '-1': []}, 'rate': {'': ''}, '缺少字段': {'': ''}, '字段缺失或冗余': {'': ''}, 'gjj_details': {'': ''}, 'newest_account': {'': ''}, 'detail_amount': {'': ''}, 'detail_record_date': {'': ''}, 'detail_record_month': {'': ''}, 'detail_balance': {'': ''}, 'detail_gjj_type': {'': ''}, 'analyzed': {'': ''}}}
2018-04-17 10:21:23,974-INFO-Execute sql costs 0.040s, gets the number of rows is 3199
2018-04-17 10:24:05,761-INFO-Execute sql costs 0.770s, gets the number of rows is 93058
2018-04-17 10:24:05,766-INFO-00100962-a081-43d6-86f7-4ddf8870aa76
2018-04-17 10:24:05,841-INFO-00098dc3-dd52-4b79-a6b7-2f8cc1a323ca
2018-04-17 10:24:05,890-INFO-02eb5ac4-f6ae-4097-ade3-ff9a1f80aa50
2018-04-17 10:24:05,952-INFO-000aca4f-3ecc-474e-b188-b2032ec35145
2018-04-17 10:24:06,146-WARNING-[ 000aca4f-3ecc-474e-b188-b2032ec35145 ]-[ 湘潭206 ]: {'analyzed': "{'cont_last_times': '2', 'back_cont_last_times': '2', 'cont_max_times': '2', 'back_cont_max_times': '40', 'sum_times': '12', 'back_sum_times': '40', 'newest_account': '1'}", 't_analyzed': 'analyzed||a3'}
2018-04-17 10:24:06,146-INFO-00141ff1-419e-4f68-b289-e51c774fa7bb
2018-04-17 10:24:06,208-INFO-000d44b9-2b1d-40fb-94c4-4d173ca17088
2018-04-17 10:24:06,257-INFO-000f0da7-e93c-42f5-b5cf-dfa9def20600
2018-04-17 10:24:06,314-INFO-0016cfea-0526-4551-a535-32703ee9460c
2018-04-17 10:24:06,361-INFO-03c26f78-d3cd-4d00-9749-814cef1ee34b
2018-04-17 10:24:06,410-INFO-02a30211-f9a0-4c36-bf86-fe7b1caa9d04
2018-04-17 10:24:06,467-INFO-019623a3-c4e1-43c6-adef-fa2b9d03325c
2018-04-17 10:24:06,752-WARNING-[ 019623a3-c4e1-43c6-adef-fa2b9d03325c ]-[ 荆州68 ]: {'analyzed': "{'cont_last_times': '129', 'back_cont_last_times': '129', 'cont_max_times': '129', 'back_cont_max_times': '129', 'sum_times': '129', 'back_sum_times': '144', 'newest_account': '1'}", 't_analyzed': 'analyzed||a3'}
2018-04-17 10:24:06,752-INFO-0008c553-7375-47db-a9ba-3ffbae811f62
2018-04-17 10:24:06,804-INFO-0009df06-d770-47dd-abb6-228b5b71882e
2018-04-17 10:24:06,864-INFO-01af91e1-c6df-485c-b0c2-f0337684c7be
2018-04-17 10:24:06,930-INFO-0008c0ac-98fe-4681-841b-7b767d8e2fab
2018-04-17 10:24:06,978-INFO-001006eb-591b-44aa-8698-40be15472e0c
2018-04-17 10:24:07,035-INFO-01dbb598-7728-427a-b9b9-a1b5cbe264e3
2018-04-17 10:24:07,093-INFO-00107558-3d99-44df-a5dc-3afbbe81d4df
2018-04-17 10:24:07,140-INFO-000e3d49-f6b1-460c-8b33-dc50a486afb2
2018-04-17 10:24:07,189-INFO-00164e79-c917-4bbd-adbc-b23366dd9e34
2018-04-17 10:24:07,240-INFO-00184aa1-4797-4932-b243-5dbef823d588
2018-04-17 10:24:07,283-INFO-00054793-abb0-4fae-8e58-4a33fb0d7af3
2018-04-17 10:24:07,330-INFO-001a1e8b-c544-4e94-8ffc-86fdf6425b22
2018-04-17 10:24:07,382-INFO-0005dc6e-b8de-4838-81fc-30f6ca702823
2018-04-17 10:24:07,437-INFO-00058a7b-62d8-488d-94c2-587af7f7e2c1
2018-04-17 10:24:07,468-INFO-025b5eb5-5287-415e-b942-20eb3744fbcb
2018-04-17 10:24:07,507-INFO-02380718-79a4-49fe-bf7c-5900bec63d81
2018-04-17 10:24:07,560-INFO-00164182-ae68-41f5-98ef-6d64ab4e87d9
2018-04-17 10:24:07,617-INFO-02b41db5-38e1-47b4-b1e9-a752b79b3c47
2018-04-17 10:24:07,669-INFO-0430a8c8-0baa-42b8-b7ab-8c1b791a2f61
2018-04-17 10:24:07,719-INFO-00001c0d-127c-4063-8e6c-148376bfb9f9
2018-04-17 10:24:07,764-INFO-000d8bfe-1462-4383-bb4c-52c672e5147a
2018-04-17 10:24:07,807-INFO-02217dcd-4e43-4cbd-aa07-42068e936624
2018-04-17 10:24:07,864-INFO-001402a3-d535-4f2e-8787-3cea5f537bf8
2018-04-17 10:24:07,906-INFO-0319406b-477a-4283-a2db-f1fe3b0f3937
2018-04-17 10:24:07,939-INFO-000e2f2d-4104-425a-a8ec-d0377b64579f
2018-04-17 10:24:07,971-INFO-037b7a3d-c0a2-42a2-9b2c-bd3ad9b38a2d
2018-04-17 10:24:08,045-WARNING-[ 037b7a3d-c0a2-42a2-9b2c-bd3ad9b38a2d ]-[ 宜宾96 ]: {'ID': '', 't_ID': 'ID|1|2'}
2018-04-17 10:24:08,045-INFO-02deff79-1387-475b-8e5b-7343bec4aa9d
2018-04-17 10:24:08,076-INFO-028c489d-116a-407b-b1b7-b378c82a6968
2018-04-17 10:24:08,129-INFO-0000cf34-be37-4cf8-a00e-e83717d5748b
2018-04-17 10:24:08,177-INFO-0010b0a3-30ed-4161-8200-956f4737cec2
2018-04-17 10:24:08,228-INFO-00171e67-3e8f-4f9e-b574-18984d4a0aec
2018-04-17 10:24:08,282-INFO-00d21319-68e0-4ecf-bc65-72fb27e019a4
2018-04-17 10:24:08,355-INFO-00122f37-0503-4e87-b7c3-10a3e07a2f45
2018-04-17 10:24:08,415-INFO-0011c6c8-aebf-4fa7-bfae-42cc8192c1f6
2018-04-17 10:24:08,466-INFO-00104cdf-b692-4d9b-90c2-0173b70da6e4
2018-04-17 10:24:08,537-INFO-000aeca2-0039-49a6-b2e6-4f775790930a
2018-04-17 10:24:08,572-INFO-04408707-7379-46b7-b8e6-3ca5567fcbd7
2018-04-17 10:24:08,666-INFO-01f9395c-a2d8-4319-b057-5b221e86ffc3
2018-04-17 10:24:08,743-INFO-01b0ac93-3fe9-4918-bcc2-7951e650154c
2018-04-17 10:24:08,822-INFO-00183d6a-ada7-4e31-9a83-9fac193097ee
2018-04-17 10:24:08,879-INFO-0005d85d-d049-4241-b4bf-26c4fbd2f824
2018-04-17 10:24:08,924-INFO-000c61b6-d82c-4ec0-b92c-5d02d8cfae4a
2018-04-17 10:24:08,985-INFO-03121e97-2f1e-4ea5-9ba4-1b8ed750a7cc
2018-04-17 10:24:09,037-INFO-02f5a028-a1c7-435d-a717-ab8c873dd75c
2018-04-17 10:24:09,087-INFO-00b08cb4-9dfb-4f88-a700-e41b7fc5c43b
2018-04-17 10:24:09,120-INFO-00023b87-72e5-4c90-b65a-5a2f9010b1da
2018-04-17 10:24:09,174-INFO-00132892-0d8a-4074-8ef8-1a16b271a59c
2018-04-17 10:24:09,241-INFO-001218ad-8bc4-4e85-96e7-c3c094e1274c
2018-04-17 10:24:09,273-INFO-034dab08-0d58-4080-a5ab-7b0f46035655
2018-04-17 10:24:09,329-INFO-001f66a8-25ab-4d07-b3f1-7fe3931f0324
2018-04-17 10:24:09,440-WARNING-[ 001f66a8-25ab-4d07-b3f1-7fe3931f0324 ]-[ 岳阳43 ]: {'analyzed': "{'cont_last_times': '0', 'back_cont_last_times': '27', 'cont_max_times': '0', 'back_cont_max_times': '28', 'sum_times': '0', 'back_sum_times': '58', 'newest_account': '1'}", 't_analyzed': 'analyzed||a3'}
2018-04-17 10:24:09,440-INFO-001f7afc-f61e-41de-ab37-4892385ced8d
2018-04-17 10:24:09,520-INFO-000cb6cc-b83c-4d79-b303-e4f49d620f20
2018-04-17 10:24:09,560-INFO-03dbc2a3-65fd-420e-977e-a5c4c264f9a7
2018-04-17 10:24:09,611-INFO-000e9181-8ec1-4a72-afb3-bb180f0a82a8
2018-04-17 10:24:09,658-INFO-02aa93d5-4809-4cb8-b9f0-9422caeb93c7
2018-04-17 10:24:09,699-INFO-000584ae-993b-4a30-aee2-92830640cd65
2018-04-17 10:24:09,756-INFO-000e3ed4-2fa4-4fa8-abb9-5d8cbcd6d88e
2018-04-17 10:24:09,848-INFO-041093fc-8a55-4737-85a4-3af4639500ba
2018-04-17 10:24:09,890-INFO-001db707-bfe1-4269-849e-12ba0df9ea56
2018-04-17 10:24:09,938-INFO-001cad02-4d28-4931-891f-e3d2fd9904fa
2018-04-17 10:24:09,980-INFO-02250225-7384-49f1-aa38-c95cbc45b27d
2018-04-17 10:24:10,035-INFO-01defe17-86dc-4c25-9e8c-5cbc14a3dff9
2018-04-17 10:24:10,105-INFO-0228d8ae-de6a-42ff-8e3d-52d5969972b1
2018-04-17 10:24:10,139-INFO-023b8c89-3b5a-4616-95ba-76ceddea5431
2018-04-17 10:24:10,271-INFO-01436fb5-4183-422b-ace8-a8b8ec6fcd15
2018-04-17 10:24:10,322-INFO-000ffdf3-b4f3-466d-a773-9f0bc005842a
2018-04-17 10:24:10,367-INFO-02bc9ef9-c44e-4ac1-ac57-81653283dc52
2018-04-17 10:24:10,414-INFO-02cf36b8-cea3-42d9-b48e-fd0b05be69a6
2018-04-17 10:24:10,464-INFO-00134b67-cab6-48c4-8303-764ccdf33805
2018-04-17 10:24:10,510-INFO-003d0c23-4119-41ae-bec2-908bd3a1c095
2018-04-17 10:24:10,571-INFO-000a9eb4-8aaa-413b-8728-cd7b0a9a5bd0
2018-04-17 10:24:10,622-INFO-02b58367-b941-4b80-8bb9-359d13cda33e
2018-04-17 10:24:10,671-INFO-011f5ba5-d32d-455c-b7ea-6dfbc9990a32
2018-04-17 10:24:10,745-INFO-03600097-cb20-4baf-bce8-47c00093a4bc
2018-04-17 10:24:10,798-INFO-03a0a885-60a5-4685-a481-1ac85d27c91c
2018-04-17 10:24:10,849-INFO-010bcce4-15c2-40e1-b08f-d8502a9bb35f
2018-04-17 10:24:10,907-INFO-03d8c99c-4873-4f00-a174-9099ea2c54d9
2018-04-17 10:24:10,972-INFO-04425b90-2442-4589-835e-59c4db509983
2018-04-17 10:24:11,005-INFO-000973e7-81cf-44ea-b0e7-f9e5d7960947
2018-04-17 10:24:11,060-INFO-00145c10-ee82-4a33-96b1-baaffb604a8a
2018-04-17 10:24:11,184-INFO-033d4c20-658b-4e47-aa1a-8f919d122815
2018-04-17 10:24:11,240-INFO-02922ddf-fa00-4e05-b649-5ec9278e4afd
2018-04-17 10:24:11,310-INFO-01fa6022-674a-4b67-ab16-11143d0d3f1a
2018-04-17 10:24:11,341-INFO-00076994-cc60-4372-8918-6d3783f1281c
2018-04-17 10:24:11,372-INFO-000c1015-7866-4ab9-b4d5-56f6499aa3c9
2018-04-17 10:24:11,419-INFO-014636e6-2c5b-448c-a7e1-8ae050ec2897
2018-04-17 10:24:11,477-INFO-04309d0d-6da2-464b-8801-bd905757c5a6
2018-04-17 10:24:11,524-INFO-02fbc8e2-941b-479d-8b2d-cdb02f284d06
2018-04-17 10:24:11,571-INFO-02e8dba0-9ab7-453b-bbc6-c75b58964c55
# -*- coding: utf8 -*-
import configs_default
def merge(defaults, override):
r = {}
for k, v in defaults.items():
if k in override:
if isinstance(v, dict):
r[k] = merge(v, override[k])
else:
r[k] = override[k]
else:
r[k] = v
return r
def toDict(d):
result = dict()
for k, v in d.items():
result[k] = toDict(v) if isinstance(v, dict) else v
return result
configs = configs_default.configs
try:
import configs_override
configs = merge(configs, configs_override.configs)
except ImportError:
pass
configs = toDict(configs)
configs = {
'debug': True,
'limit': 5,
'mysql': {
'host': 'rm-bp1272001633qc0x9o.mysql.rds.aliyuncs.com',
'user': 'query',
'pwd': '5gqR2EQK',
},
'mongo': {
'query': 'mongodb://root:CQ72J2qKY47edG7@112.124.105.123:3717/Gjj'
},
'log': {
'info': './'
},
'rules': {
# 匹配到了, 并和m的值相等就是错的
'0': {'_re': '^[\s\S]+$', 'desc': '但是有值', 'm': True},
'1': {'_re': '', 'desc': '字段缺失', 'm': False},
# 'ID': {
'2': {'_re': r'^\d{18}$|^\d{17}X$', 'desc': '不是由数字,"x"组成或者长度异常', 'm': False},
'3': {'_re': r'[\d*x]{4,18}', 'desc': '不是由数字,"x", "*"组成或者长度不在[4,18]之内', 'm': False},
# 'name': {
'5': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.]', 'desc': '存在非(中文, ".")字符', 'm': True},
'6': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.**]', 'desc': '存在非(中文, ".", "*", "*")字符', 'm': True},
# 'company': {
# '8': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{30,}$', 'desc': '长度不在[1, 30]之内', 'm': True},
'8': {'_re': r'[*#?]+|^\d+$', 'desc': '存在("*","#","?")字符或仅有数字组成', 'm': True},
'9': {'_re': r'[#?]+|^\d+$', 'desc': '存在("#","?")字符或仅有数字组成', 'm': True},
# status
'a': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{7,}$', 'desc': '长度不在[1, 7]之内', 'm': True},
'b': {'_re': r'[^\u4e00-\u9fa5\u3400-\u4DB5\ue863()()]', 'desc': '存在非(中文, "(", ")")字符', 'm': True},
# company_id customer_id
'c': {'_re': r'^[\da-zA-Z-]+$', 'desc': '不是由数字, 字母, "-"组成', 'm': False},
'd': {'_re': r'^[\d*a-zA-Z-]+$', 'desc': '不是由数字, 字母, "*", "-"组成', 'm': False},
'e': {'_re': r'^[\s\S]{11}$|^[\s\S]{6,9}$|^\d{3,4}-\d{6,8}$', 'desc': '长度不是11位,[6,8]位, 或者不是xxxx-xxxxxxxx格式',
'm': False},
# phone
'f': {'_re': r'^[\d-]+$', 'desc': '不是由数字, "-"组成', 'm': False},
'g': {'_re': r'^[\d*-]+$', 'desc': '不是由数字, ,"-", "*"组成', 'm': False},
# card
'h': {'_re': r'^[\w]+$|^\d{3,4}-\d{6,8}$|^\d{3,4}-\d{14,15}$', 'desc': '不是由数字,组成', 'm': False},
'i': {'_re': r'^[\d*]+$', 'desc': '不是由数字, "*"组成', 'm': False},
# 日期格式
'j': {'_re': r'^\d{4}-\d{2}-\d{2}$', 'desc': '不是yyyy-mm-dd格式', 'm': False},
#
'k': {'_re': r'', 'desc': '不在1990-01-01和当前日期之间', 'm': False},
'l': {'_re': r'', 'desc': '与明细最新记录日期不符', 'm': False},
'm': {'_re': r'', 'desc': '存在更近的日期', 'm': False},
'n': {'_re': r'', 'desc': '没有从明细里取', 'm': False},
'o': {'_re': r'', 'desc': '非正常账号', 'm': False},
# 金额
'p': {'_re': r'^[\d\.]+$', 'desc': '存在非(数字, ".")字符', 'm': False},
'q': {'_re': r'^\.', 'desc': '第一位是"."', 'm': True},
# 比例
'r': {'_re': r'^0\.\d{1,5}$|^0$', 'desc': '不是0.xxxxx的格式', 'm': False},
# 上下限
's': {'_re': r'', 'desc': '小于下限', 'm': False},
't': {'_re': r'', 'desc': '大于上限', 'm': False},
'u': {'_re': r'', 'desc': '个人缴存比例+公司缴存比例不等于缴存比例', 'm': False},
'v': {'_re': r'', 'desc': '大于开始缴存时间或record_date', 'm': False},
'w': {'_re': r'', 'desc': 'start_date大于record_date', 'm': False},
'x': {'_re': r'', 'desc': '大于明细里第一条记录的日期', 'm': False},
'y': {'_re': r'', 'desc': '', 'm': False},
'z': {'_re': r'', 'desc': '与校验有出入', 'm': False},
'a1': {'_re': r'', 'desc': '明细数据为空', 'm': False},
'a2': {'_re': r'', 'desc': '月缴有误', 'm': False},
'a3': {'_re': r'', 'desc': '王鲁宁的分析数据有误', 'm': False},
'a4': {'_re': r'', 'desc': '明细中缴存类型为提取/偿还/支取/还贷, 但是金额不为负', 'm': False},
'a5': {'_re': r'', 'desc': '明细中gjj_type值不是0或1', 'm': False},
'a6': {'_re': r'', 'desc': '明细中金额格式存在非(数字, ".", "-")字符', 'm': False},
'a7': {'_re': r'', 'desc': '明细中日期格式不是yyyy-mm或yyyy-mm-dd', 'm': False},
'a8': {'_re': r'', 'desc': '明细中有record_month但是没有record_date', 'm': False},
# 是否与明细里的公司相等
},
'use': {
'ID': {
'1': ['2'],
'0': ['3'],
'-1': [],
},
'name': {
'1': ['5'],
'0': ['6'],
'-1': [],
},
'company': {
'1': ['8'],
'0': ['9'],
'-1': ['0'],
},
'status': {
'1': ['a', 'b'],
'-1': ['0'],
},
'customer_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'card': {
'1': ['h'],
'0': ['i'],
'-1': [],
},
'company_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'phone': {
'1': ['f', 'e'],
'0': ['g', 'e'],
'-1': [],
},
'record_date': {
'1': ['j'],
'-1': []
},
'balance': {
'1': ['p', 'q'],
'-1': []
},
'deposit_amount': {
'1': ['p'],
'-1': []
},
'deposit_base': {
'1': ['p'],
'-1': []
},
'person_rate': {
'1': ['r'],
'-1': [],
},
'company_rate': {
'1': ['r'],
'-1': [],
},
'deposit_rate': {
'1': ['r'],
'-1': [],
},
'birthday': {
'1': ['j'],
'-1':[]
},
'init_date': {
'1': ['j'],
'-1': []
},
'start_date': {
'1': ['j'],
'-1': []
},
'rate': {'': ''},
'缺少字段': {'': ''},
'字段缺失或冗余': {'': ''},
'gjj_details': {'': ''},
# 王鲁宁的分析数据
'newest_account': {'': ''},
'detail_amount': {'': ''},
'detail_record_date': {'': ''},
'detail_record_month': {'': ''},
'detail_balance': {'': ''},
'detail_gjj_type': {'': ''},
'analyzed': {'': ''},
}
}
# 枣庄 临沧 定州 双鸭山 平顶山 宿州 赤峰 驻马店 保定 四平
configs = {
'mysql': {
'host': 'rds-jb-08.mysql.rds.aliyuncs.com',
},
'debug': False,
'limit': 500,
'log': {
'info': '/root/MicroserviceTest/gjj_support/logs'
}
}
# coding: utf-8
# author: hw
from thread import Threading
import json
class Manager:
def __init__(self):
self.tasks = dict()
def create_task(self, name):
self.tasks[name] = 'wait'
:w
# -*- coding: utf-8 -*-
import traceback
import random
import re
import os
import json
import datetime
import time
import logging
import copy
import contextlib
import pandas as pd
import numpy as np
import requests
import pymysql
from pymongo import MongoClient
from dateutil.relativedelta import relativedelta as Datedelta
from DBUtils.PooledDB import PooledDB
from config import configs
import utils
def logger_config(date):
info_file = os.path.join(configs['log']['info'], 'check-%s.log' % date)
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
info_handler = logging.FileHandler(info_file, 'w', encoding='utf8')
info_handler.setFormatter(logging.Formatter('%(asctime)s-%(levelname)s-%(message)s'))
logger.addHandler(info_handler)
return logger
def round_up(value):
try:
v = float(value)
return round(v * 100) / 100.0
except Exception:
log.error(traceback.print_exc())
return 0
today = datetime.date.today()
delta = datetime.timedelta(days=1)
yestoday = (today - delta).strftime('%Y-%m-%d')
print(yestoday)
TODAY = today.strftime('%Y-%m-%d')
log = logger_config(yestoday)
pool = PooledDB(pymysql, mincached=1, maxconnections=3,
host=configs['mysql']['host'],
user=configs['mysql']['user'],
password=configs['mysql']['pwd'],
charset='utf8'
)
class Reviewer:
def __init__(self, date):
self.rule = Rule()
self.rule.centers_info()
self.date = date
self.errors = []
self.fields = list(configs['use'].keys())
self.columns = self.build_cols()
self.csv = os.path.join(configs['log']['info'], 'check-%s.csv' % date)
def build_cols(self):
cols = ['cid', 'city', 'order_id', 'login', ]
for item in self.fields:
cols.append(item)
cols.append('t_%s' % item)
return cols
@staticmethod
def from_mongo(order_id):
time.sleep(0.02)
try:
url = 'http://tv.51gjj.com:11250/gjj/%s' % order_id
response = requests.get(url)
# print(response.status_code)
return response.json()
except Exception:
traceback.print_exc()
log.error(traceback.print_exc())
return {'code': -1, 'msg': '获取数据失败'}
def find_order_ids(self, tables):
order_ids = set()
for table in tables:
query = "SELECT orderId FROM node_gjj.%s WHERE `status`='success' AND DATE(updateDate) = '%s'" \
% (table, self.date)
t1 = time.clock()
conn = pool.connection()
with contextlib.closing(conn) as conn:
cursor = conn.cursor()
with contextlib.closing(cursor) as cursor:
rows = cursor.execute(query)
if configs['limit']:
datas = cursor.fetchall()[:configs['limit']]
else:
datas = cursor.fetchall()
t2 = time.clock()
log.info('Execute sql costs %.3fs, gets the number of rows is %s' % (t2 - t1, rows))
for row in datas:
order_ids.add(row[0])
return order_ids
def process(self, order_ids):
for order_id in order_ids:
log.info(str(order_id))
# print(row)
doc = self.from_mongo(order_id)
if doc['code'] == 0:
data = doc['data']['gjjData']
# warn_msg is list
try:
warn_msg = self.rule.run(data)
if warn_msg[1]:
# print(warn_msg[1])
self.warn(warn_msg[0], order_id, warn_msg[1])
except Exception:
print(order_id)
log.error(traceback.print_exc())
else:
log.info('%s: %s' % (order_id, doc['msg']))
# print('%s: %s' % (row[0], doc['msg']))
def warn(self, city, order_id, warn_msg):
params = self.login_params(order_id)
record = {'order_id': order_id, 'city': city}
try:
record['login'] = str(params)
except Exception:
record['login'] = ''
for k, v in warn_msg.items():
record[k] = v
self.errors.append(record)
log.warn('[ %s ]-[ %s ]: %s ' % (order_id, city, warn_msg))
def check_order(self, order_id):
doc = self.from_mongo(order_id)
if doc['code'] == 0:
result = doc['data']['gjjData']
warn_msg = self.rule.run(result)
log.warn(warn_msg)
else:
log.warn('[ %s ]: %s ' % (order_id, doc['msg']))
# print('%s is error: %s' % (order_id, doc['msg']))
def refresh(self):
url = 'https://tt.51gjj.com/postloan/update'
headers = {
'Content-Type': 'application/json'
}
for order_id in self.rule.need_refresh:
form = {
"param": {
"orderSn": order_id
},
"callbackUrl": "https://tt.51gjj.com:5000/redirect"
}
requests.post(url, data=form, headers=headers)
time.sleep(1)
time.sleep(120)
def login_params(self, order_id):
try:
url = 'http://tv.51gjj.com:11250/gjjlogininfo/%s' % order_id
res = requests.get(url)
r = res.json()
if not r.get('msg'):
del r['cityname']
return r
except Exception:
return {'msg': '获取登录参数错误'}
def out_result(self):
sub = Submit()
sub.login()
# for record in self.errors:
# sub.submit_bug(record['city'], record)
# time.sleep(1)
df = pd.DataFrame(self.errors, columns=self.columns)
for item in self.fields:
t_item = 't_%s' % item
if not configs['debug']:
try:
grouped = df.groupby(['city', t_item])
for (city, field), group in grouped:
d = group[:5]
title = '%s: %s' % (city, self.trans_rule_desc(field))
content = []
for error in list(d.T.to_dict().values()):
content.append(
{'order_id': error['order_id'],
'login': error['login'],
item: error[item]
})
if 'analyzed' in title:
continue
sub.submit_bug(title, content)
time.sleep(20)
except Exception as e:
log.error('[%s]: %s' % ('out_result', traceback.print_exc()))
# print('out_result', e)
df[t_item] = df[t_item].map(self.trans_rule_desc)
df.to_csv(self.csv, encoding='gbk')
@staticmethod
def trans_rule_desc(keys):
if isinstance(keys, str):
rule = keys.split('|')
desc = '; '.join([configs['rules'][i]['desc'] for i in rule[2:]])
return '%s配置为%s: %s' % (rule[0], trans_config(rule[1]), desc)
def trans_config(t):
if t == '1':
return '完整'
if t == '-1':
return '不存在'
if t == '0':
return '不完整'
def build_index():
a = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
b = '0123456789'
index = '%s%s%s' % (random.choice(a), random.choice(b), random.choice(a),)
while index in configs['rules'].keys():
index = '%s%s%s%s' % (random.choice(a), random.choice(b), random.choice(a), random.choice(a))
return index
class Rule:
_deposit_amount_error = 50
_status_set = {'交存', '缴存', '正常交存', '正常缴存', '正常', '正常汇缴', '进行中', '正常缴交', }
# 官网有问题的 ,不检查的城市: 绥化, 宁德, 驻马店, 内江
# 多个账号明细合一起的: 巴中, 重庆
_ignore_date_cid = {'356', '135', '430', '22', '443', '32', '288', '136', '247', }
_ignore_deposit_base = {'110', '19', '95', '447', '37', '351', '124', '417', '269', '62', '430', '87', '34',
'338', '57', '381', '352', '105', '27', '47', '38', '101', '312', '92', '358',
'121', '438', '94', '309', '380', '375', '367', '366', '292', '309', '368', '320', '311',
'349', '63',
}
_ignore_rate = {'22', '352', '87', '345', '380', '375', '367', '366', '292', '309', '368', '320', '311', '52', '82',
'62', '445', }
_keys = {'data', 'cid', 'orderId', 'location', 'cityName', 'create_date'}
_rate_weight = 2
_base_weight = 3
brief_keys = {"ID", "name", "card", "customer_id", "deposit_amount", "fb_deposit_amount", "person_rate",
"fb_balance", "once_balance", "status", "record_date", "company", "deposit_base", "balance",
"company_rate", "init_date", "start_date", "sex", "email", "phone", "marriage", "address",
"birthday", "company_id", "person_deposit_amount", "company_deposit_amount", "deposit_rate",
}
record_key = {"id", "gjj_type", "company", "order_id", "record_date", "op_type", "record_month",
"amount", "balance", "remark", "comments", "deposit_type", "cont_flag", "back_cont_flag"
}
detail_filter = '上年结转|前日余额|本年合计|^期初'
op_set = set()
farest_date = '1990-01-01'
infos = None
need_refresh = set()
def __init__(self):
self.cid = None
self.orderId = None
self.error = {}
self.newest_date = None
self.last_record = None
self.first_record = None
self.last_deposit = None
self.has_record = False
self.const_ret = []
self.all_ret = []
self.history_record = []
@classmethod
def centers_info(cls):
res = requests.get('http://tm.51gjj.com:1010/CenterConfig/GetCentersJson')
infos = res.json()
centers = []
for province in infos:
for city in province['城市']:
for center in city['公积金中心']:
centers.append(center)
df = pd.DataFrame(centers)
df_new = df.set_index('中心id')
logging.error(df_new)
for column in df_new.columns:
# print(column)
df_new[column] = df_new[column].map(Rule.trans)
df_new['个人缴纳比例上限'] = df_new['个人缴纳比例上限'].replace('', 0).map(float)
df_new['公司缴纳比例上限'] = df_new['公司缴纳比例上限'].replace('', 0).map(float)
df_new['个人缴纳比例下限'] = df_new['个人缴纳比例下限'].replace('', 0).map(float)
df_new['公司缴纳比例下限'] = df_new['公司缴纳比例下限'].replace('', 0).map(float)
df_new['缴纳比例上限'] = df_new['个人缴纳比例上限'] + df_new['公司缴纳比例上限']
df_new['缴纳比例下限'] = df_new['个人缴纳比例下限'] + df_new['公司缴纳比例下限']
cls.infos = df_new
@staticmethod
def trans(text):
# print(text)
if re.findall('不完整', text):
return '0'
if re.findall('完整', text):
return '1'
if re.findall('不存在', text):
return '-1'
return text
def check_id(self, id_num):
mode = self.get_mode('身份证')
if mode != '-1':
self.test_value(id_num, 'ID', mode)
def check_name(self, name):
mode = self.get_mode('姓名')
if mode != '-1':
self.test_value(name, 'name', mode)
# if not (re.findall(r'[·.﹒]', name) or cid in ['279', '31', '213', '35', '154']):
# rule_index = self.test_len(name, 2, 5)
# if rule_index:
# self.build_error(name, 'name', mode, rule_index)
def check_deposit_amount(self, brief):
mode = self.get_mode('月缴')
# 从明细里取的月缴/比例或者基数有问题
if self.cid in {'14'}:
return
if self.cid in ['100', '333', '180', '390', '21', '202', '36', '270', '108', '110', '7', '81', '48'
'110', '51', '46', '39'] and not self.last_deposit:
return
self.test_value(brief['deposit_amount'], 'deposit_amount', mode)
# 基本信息里有月缴, 没有缴存基数, 个人缴存额, 没有明细
# 100-有半缴个人
if self.cid in {'141', '29', '14', '374', '28', '339', '56', '326', '321', '138', '332', '345', '100'}:
return
passed = False
if mode != '-1':
try:
amount = float(brief['deposit_amount'])
except ValueError:
return
# 缴存基数 * 缴存比例
if brief['deposit_rate'] and brief['deposit_base']:
rate = float(brief['deposit_rate'])
base = float(brief['deposit_base'])
if rate * base - amount < abs(self._deposit_amount_error):
passed = True
# 个人缴存额 + 公司缴存额
if brief['person_deposit_amount'] and brief['company_deposit_amount']:
amount = float(brief['deposit_amount'])
p_da = float(brief['person_deposit_amount'])
c_da = float(brief['company_deposit_amount'])
if c_da + p_da - amount < abs(self._deposit_amount_error):
passed = True
# 要有明细 与最新一条汇缴明细比较
if self.last_deposit:
last_deposit = float(self.last_deposit)
if last_deposit - amount < abs(self._deposit_amount_error):
passed = True
if not passed:
self.build_error(amount, 'deposit_amount', mode, 'a2')
def check_deposit_base(self, deposit_base):
if self.cid in ['352', '14']:
return
mode = self.get_mode('缴存基数')
self.test_value(deposit_base, 'deposit_base', mode)
def check_company(self, company):
mode = self.get_mode('公司')
if mode != '-1':
self.test_value(company, 'company', mode)
def check_status(self, status):
mode = self.get_mode('公积金状态')
self.test_value(status, 'status', mode)
def check_customer_id(self, customer_id):
if self.cid in ['14']:
return
if self.cid in ['61'] and not self.has_record:
return
mode = self.get_mode('公积金账号')
self.test_value(customer_id, 'customer_id', mode)
def check_card(self, card):
if self.cid in ['307']:
return
mode = self.get_mode('联名卡号')
self.test_value(card, 'card', mode)
def check_company_id(self, company_id):
mode = self.get_mode('单位账号')
self.test_value(company_id, 'company_id', mode)
def check_phone(self, phone):
mode = self.get_mode('手机号')
self.test_value(phone, 'phone', mode)
def check_date(self, i_date, s_date, r_date, ):
i_mode = self.get_mode('开户时间')
s_mode = self.get_mode('开始缴存时间')
mode = self.get_mode('公积金record_date')
if r_date:
self.test_value(r_date, 'record_date', mode)
if not self.farest_date <= r_date <= TODAY:
self.build_error(r_date, 'record_date', mode, 'k')
# 从基本信息里取的record_date
# 明细配置为完整,
# if not self.detail_ret.get('end_date'):
# # 没有明细, 可能是真的没取到, 可能是销户
# if account_state == '1':
# self.build_error(date, 'record_date', mode, 'm')
if self.cid in self._ignore_date_cid:
return
if i_date and r_date and i_date > r_date:
# init_date
self.test_value(i_date, 'init_date', i_mode)
self.build_error([i_date, s_date, r_date], 'init_date', i_mode, 'v')
return
if i_date and s_date and i_date > s_date:
# start_date
self.test_value(s_date, 'start_date', s_mode)
self.build_error([i_date, s_date, r_date], 'init_date', i_mode, 'v')
return
if r_date and s_date and s_date > r_date:
self.build_error([s_date, r_date], 'start_date', s_mode, 'w')
return
def check_rate(self, p_rate, c_rate, rate):
if p_rate and float(p_rate) > 0:
p_mode = self.get_mode('个人缴存比例')
self.test_value(p_rate, 'person_rate', p_mode)
if c_rate and float(c_rate) > 0:
c_mode = self.get_mode('公司缴存比例')
self.test_value(c_rate, 'company_rate', c_mode)
try:
if rate and float(rate) > 0:
self.test_value(rate, 'deposit_rate', '1')
except Exception:
traceback.print_exc()
log.error(self.error)
def check_birthday(self, birthday):
if birthday:
self.test_value(birthday, 'birthday', '1')
def check_balance(self, balance):
if self.cid in ['207', '14'] and not self.has_record:
return
mode = self.get_mode('余额')
self.test_value(balance, 'balance', mode)
def check_analyzed(self, doc):
s = self.user_state(doc)
if s == doc['gjj_account_analyzed_data']['newest_account']:
return True
else:
self.build_error(doc['gjj_account_analyzed_data']['newest_account'], 'newest_account', '', 'z')
def check_details(self, details, init_date, bal):
self.skip_detail(details)
report = True
report_id = 0
item_id = ''
amount = ''
for item in details:
if item['record_month'] and not item['record_date']:
self.build_error('%s|%s' % (item['id'], item['record_date']), 'detail_record_date', '', 'a8')
if item['gjj_type'] not in ['0', '1']:
self.build_error('%s|%s' % (item['id'], item['gjj_type']), 'detail_gjj_type', '', 'a5')
return
if item['record_date'] and not re.findall(r'^\d{4}-\d{2}-\d{2}$', item['record_date']):
self.build_error('%s|%s' % (item['id'], item['record_date']), 'detail_record_date', '', 'a7')
return
if item['record_month'] and not re.findall(r'^\d{6}$', item['record_month']):
self.build_error('%s|%s' % (item['id'], item['record_month']), 'detail_record_month', '', 'a7')
return
if item['amount'] and not re.findall(r'^[\d\.\-]+$', item['amount']):
self.build_error('%s|%s' % (item['id'], item['amount']), 'detail_amount', '', 'a6')
return
if item['balance'] and not re.findall(r'^[\d\.\-]+$', item['balance']):
self.build_error('%s|%s' % (item['id'], item['balance']), 'detail_balance', '', 'a6')
return
if re.search('提取|偿还|支取|还贷', item['state']) and '-' in item['amount'] and self.zero(item['amount']):
report = False
if re.search('提取|偿还|支取|还贷', item['state']) and not re.search('退回|还贷缴存|失败|公积金偿还利息|冲账|支取红冲|支取退款', item['state']) and '-' not in item['amount'] and self.zero(item['amount']) and report:
report_id = 2
item_id = item['id']
amount = item['amount']
if report_id == 2 and report:
self.build_error('%s|%s' % (item_id, amount), 'detail_amount', '', 'a4')
d = utils.away_today(init_date)
mode = self.get_mode('公积金明细')
if mode == '1' and len(details) == 0 and d and d[0] == 0 and d[1] > 2 and self.zero(bal):
print('%s is added to set' % self.orderId)
self.need_refresh.add(self.orderId)
# self.build_error(len(details), 'gjj_details', mode, 'a1')
# 修复->check_details --> 如果还是出错就报bug
def check_analyzed_data(self, analyzed):
if self.cid in ['122']:
return
if len(self.const_ret) > 0:
if int(analyzed['cont_last_times']) != self.const_ret[-1]:
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['cont_max_times']) != max(self.const_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['sum_times']) != sum(self.const_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if len(self.all_ret) > 0:
if int(analyzed['back_cont_max_times']) != max(self.all_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['back_sum_times']) != sum(self.all_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['back_cont_last_times']) != self.all_ret[-1]:
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
def test_value(self, raw, col, mode):
used_rules = configs['use'][col][mode]
for index in used_rules:
rule = configs['rules'][index]
if bool(re.findall(rule['_re'], raw, re.I)) == rule['m']:
self.build_error(raw, col, mode, index)
def test_range(self, raw, col):
""" 检查是否在上下限之内 """
if not raw:
return
col = col.replace('缴存', '缴纳')
col = col.replace('缴纳基数', '基数')
try:
if '比例' in col:
_min = self.get_mode('%s下限' % col) / 100 / self._rate_weight
else:
_min = float(self.get_mode('%s下限' % col)) / self._base_weight
except ValueError:
_min = 0
try:
if '比例' in col:
_max = self.get_mode('%s上限' % col) / 100 * self._rate_weight
else:
_max = float(self.get_mode('%s上限' % col)) * self._base_weight
except ValueError:
_max = None
if _min > round_up(raw):
return 's'
if _max and _max < round_up(raw):
return 't'
def get_mode(self, col):
cell = self.infos.ix[int(self.cid), [col]]
mode = '-1'
if len(cell) > 0:
mode = cell[0]
return mode
def user_state(self, doc):
status = doc['gjj_brief']['status']
record_date = doc['gjj_brief']['record_date']
if not isinstance(status, str):
raise TypeError('Status')
if status:
if status in self._status_set:
return '1'
else:
return '0'
datedelta = utils.away_today(record_date)
if datedelta:
if datedelta[1] < 3 and datedelta[0] == 0:
return '1'
else:
return '0'
else:
return '0'
def build_error(self, raw, col, mode, rule_index):
"""
:param raw: 检测的元数据
:param col: 元数据所在的字段名
:param mode: 元数据的配置
:param rule_index: 规则的索引
:return:
"""
key = 't_%s' % col
if self.error.get(key):
self.error[key] += '|%s' % rule_index
else:
self.error[col] = str(raw)
self.error[key] = '%s|%s|%s' % (col, mode, rule_index)
@staticmethod
def test_len(raw, min_len=0, max_len=0):
try:
if not min_len <= len(str(raw)) <= max_len:
rule_index = '4'
configs['rules'][rule_index] = {'_re': '', 'desc': '长度不在[%d, %d]之内' % (min_len, max_len)}
return rule_index
# return '(%s)-长度不在[%d, %d]之内' % (raw, min_len, max_len)
return ''
except Exception:
log.error(traceback.print_exc())
def field_integrity(self, doc):
if doc['gjj_brief']:
lack = self.brief_keys.difference(doc['gjj_brief'].keys())
if lack:
self.build_error(lack, '缺少字段', '', 'y')
if doc['gjj_detail']:
for record in doc['gjj_detail']:
lack = self.record_key.difference(record.keys())
if lack:
self.build_error(lack, '缺少字段', '', 'y')
return
def skip_detail(self, details):
"""
:param details:
:return: newest balance, amount, company
"""
# 提取/偿还/支取/还贷
raw = []
history_df = []
if self.history_record:
history_df = pd.DataFrame(self.history_record)
history_df = history_df['record_date'].tolist()
details.extend(self.history_record)
for item in details:
item['state'] = '|'.join([item['remark'], item['deposit_type'], item['comments']])
op = re.sub('[\d公积金]+', '', item['state'])
self.op_set.add(op)
if not (re.search(self.detail_filter, item['state']) or item['record_month'] == ''):
raw.append(item)
if len(raw) == 0:
return
self.has_record = True
data = pd.DataFrame(raw)
df = data.set_index('record_date')
df = df.sort_values(by='record_month')
m = n = 0
for index, row in df[0:].iterrows():
# print(row['record_month'], row['state'])
state = row['state']
minuend = utils.format_date(row['record_month'])
if re.search('汇缴|补|缴交|汇交|缴存|交缴|未分摊转入到住房公积金|月缴公积金|半缴|漏缴|^汇\d', state) and not re.search('自定义缴存单位', state):
if m == 0:
all_mark = minuend
self.all_ret.append(1)
m += 1
mon_delta = Datedelta(minuend, all_mark)
if mon_delta.years == 0 and mon_delta.months == 1:
self.all_ret[-1] += 1
elif mon_delta.months > 1 or mon_delta.years > 0:
self.all_ret.append(1)
all_mark = minuend
if re.search('汇缴|缴交|汇交|缴存|交缴|未分摊转入到住房公积金|月缴公积金|半缴|^汇\d', state) and not re.search('[^汇及含]补|自定义缴存单位|缴存红冲|漏缴', state):
if n == 0:
const_mark = minuend
self.const_ret.append(1)
n += 1
mon_delta = Datedelta(minuend, const_mark)
# print(mon_delta.years, '_', mon_delta.months)
far_today = Datedelta(today, minuend)
if far_today.years == 0 and far_today.months < 4 and index not in history_df:
self.last_deposit = row['amount']
if mon_delta.years == 0 and mon_delta.months == 1:
# self.build_error()
self.const_ret[-1] += 1
elif mon_delta.months > 1 or mon_delta.years > 0:
self.const_ret.append(1)
const_mark = minuend
# print(self.all_ret, self.const_ret)
@staticmethod
def zero(amount):
try:
if float(amount) != 0:
return True
else:
return False
except:
return False
def run(self, data):
"""
check
:param data: gjjData['data']
:return:
"""
try:
self.orderId = data['orderId']
diff = self._keys.issubset(set(data.keys()))
if not diff:
self.build_error(diff, '字段缺失或冗余', '', 'y')
else:
for gjj_data in data['data']['gjj_data']:
if gjj_data['gjj_account_analyzed_data']['newest_account'] != '1':
self.history_record.extend(gjj_data['gjj_detail'])
for gjj_data in data['data']['gjj_data']:
self.cid = data['cid']
self.field_integrity(gjj_data)
if gjj_data['gjj_account_analyzed_data']['newest_account'] != '1':
continue
self.check_id(gjj_data['gjj_brief']['ID'])
self.check_name(gjj_data['gjj_brief']['name'])
self.check_company(gjj_data['gjj_brief']['company'])
self.check_status(gjj_data['gjj_brief']['status'])
self.check_customer_id(gjj_data['gjj_brief']['customer_id'])
self.check_card(gjj_data['gjj_brief']['card'])
self.check_company_id(gjj_data['gjj_brief']['company_id'])
self.check_phone(gjj_data['gjj_brief']['phone'])
self.check_balance(gjj_data['gjj_brief']['balance'])
# 比较复杂的检查
self.check_deposit_base(gjj_data['gjj_brief']['deposit_base'])
self.check_date(gjj_data['gjj_brief']['init_date'], gjj_data['gjj_brief']['start_date'],
gjj_data['gjj_brief']['record_date'])
self.check_rate(gjj_data['gjj_brief']['person_rate'], gjj_data['gjj_brief']['company_rate'],
gjj_data['gjj_brief']['deposit_rate'])
self.check_birthday(gjj_data['gjj_brief']['birthday'])
self.check_details(gjj_data['gjj_detail'], gjj_data['gjj_brief']['init_date'],
gjj_data['gjj_brief']['balance'])
self.check_deposit_amount(gjj_data['gjj_brief'])
self.check_analyzed_data(gjj_data['gjj_account_analyzed_data'])
self.all_ret = []
self.const_ret = []
warn_msg = copy.deepcopy(self.error)
ret = '%s%s' % (data['cityName'], self.cid), warn_msg
self.__init__()
return ret
except Exception as e:
print('%s is error' % self.orderId)
traceback.print_exc()
self.__init__()
class Submit:
"""
提交异常数据到bugclose
"""
def __init__(self):
self.session = requests.Session()
self.session.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://www.bugclose.com',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
self.token = None
self.user = 'huangwei@jianbing.com'
self.pwd = 'Hw123211'
def login(self):
url = 'https://www.bugclose.com/cgi/user/login'
form = {
'email': self.user,
'password': self.pwd,
'inviteCode': '',
}
res = self.session.post(url, data=form)
root = res.json()
self.token = root['root']
url = 'https://www.bugclose.com/cgi/project/hasJoined'
form = {
'token': root['root'],
'id': '14124'
}
res = self.session.post(url, data=form)
ret = res.json()
def submit_bug(self, title, content):
content = self.format_json(content)
url = 'https://www.bugclose.com/cgi/bug/add'
form = {
'title': title,
'imageIds': '',
'bugType': '',
'attachmentIds': '',
'priority': 'Urgent',
'version': '',
'environment': '',
'description': content,
# 产品id
'productId': '9534',
# 指向谁 朱: 54396 ,宋: 24452
'assignToId': '24452',
'sprintId': '0',
'planDate': '',
'dueDate': '',
'planVersion': '',
'parentId': '',
'module': '',
'workload': '',
'value': '',
'tagIds': '',
'testCaseId': '',
'testTaskId': '',
'customValues': '',
'token': self.token,
'projectId': '14124',
}
res = self.session.post(url, data=form)
ret = res.json()
if ret['success']:
log.info('提交bug成功')
else:
time.sleep(30)
log.warn('%s\n%s' % (title, content))
log.warn('提交bug失败: %s' % ret)
def format_json(self, list_data):
s = ''
for row in list_data:
s += str(row) + '\n'
return s[:1990]
if __name__ == '__main__':
log.info(configs)
viewer = Reviewer(yestoday)
ts = {'gjj_status', 'gjjh5_status'}
orderids = viewer.find_order_ids(ts)
viewer.process(orderids)
# viewer.check_order('ef95904b-75ab-4f00-b82a-d9c2f3709835')
if viewer.rule.need_refresh:
viewer.refresh()
# viewer.process(viewer.rule.need_refresh)
viewer.out_result()
import logging
import os
import sys
import traceback
import pandas as pd
from config import configs
def logger_config(filename):
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename, 'w', encoding='utf8')
handler.setFormatter(logging.Formatter('%(asctime)s - %(message)s'))
logger.addHandler(handler)
return logger
def test(x):
return x
if __name__ == '__main__':
data = [
{'city': '杭州', 'ID': '33018219940121216', 't_ID': '1', 'name': 'anan', 't_name': '1'},
{'city': '上海', 'ID': '33018219940165466', 't_ID': '1', 'name': 'lolo', 't_name': '1'},
{'city': '上海', 'ID': '2326****0322', 't_ID': '1', 'name': 'qwe', 't_name': '2'},
{'city': '上海', 'ID': '2326****0322', 't_ID': '1', 'name': 'qwe', 't_name': '2'},
{'city': '上海', 'ID': '2326****12365', 't_ID': '1',},
{'city': '上海', 'ID': '34234****23123', 't_ID': '1', 'name': 'aaa', 't_name': '4'},
{'city': '北京', 'ID': '3435****2763', 't_ID': '1', 'name': 'ccc', 't_name': '4'},
{'city': '北京', 'ID': '2326****67675', 't_ID': '1', 'name': 'ddd', 't_name': '3'},
{'city': '雅安', 'name': 'zzz', 't_name': '4'},
]
df = pd.DataFrame(data)
df.fillna('')
print(df.ix[0]['ID'])
grouped = df.groupby([df['city'], df['t_name']])
for (cc, k), g in grouped:
print(cc, k)
d = g[:2]
print(d.T.to_dict().values())
print(configs)
# -*- coding: utf-8 -*-
import datetime
from dateutil.relativedelta import relativedelta
def away_today(date):
""" caluate how long from today """
if date and isinstance(date, str):
today = datetime.date.today()
date = date.split('-')
date = datetime.date(year=int(date[0]), month=int(date[1]), day=int(date[2]))
datedelta = relativedelta(today, date)
return datedelta.years, datedelta.months, datedelta.days
def format_date(raw):
year = int(raw[:4])
mon = int(raw[4:])
return datetime.date(year=year, month=mon, day=1)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
# -*- coding: utf8 -*-
import configs_default
def merge(defaults, override):
r = {}
for k, v in defaults.items():
if k in override:
if isinstance(v, dict):
r[k] = merge(v, override[k])
else:
r[k] = override[k]
else:
r[k] = v
return r
def toDict(d):
result = dict()
for k, v in d.items():
result[k] = toDict(v) if isinstance(v, dict) else v
return result
configs = configs_default.configs
try:
import configs_override
configs = merge(configs, configs_override.configs)
except ImportError:
pass
configs = toDict(configs)
configs = {
'debug': True,
'limit': 500,
'mysql': {
'host': 'rm-bp1272001633qc0x9o.mysql.rds.aliyuncs.com',
'user': 'query',
'pwd': '5gqR2EQK',
},
'mongo': {
'query': 'mongodb://root:CQ72J2qKY47edG7@112.124.105.123:3717/Gjj'
},
'log': {
'info': './'
},
'rules': {
# 匹配到了, 并和m的值相等就是错的
'0': {'_re': '^[\s\S]+$', 'desc': '但是有值', 'm': True},
'1': {'_re': '', 'desc': '字段缺失', 'm': False},
# 'ID': {
'2': {'_re': r'^\d{18}$|^\d{17}X$', 'desc': '不是由数字,"x"组成或者长度异常', 'm': False},
'3': {'_re': r'[\d*x]{4,18}', 'desc': '不是由数字,"x", "*"组成或者长度不在[4,18]之内', 'm': False},
# 'name': {
'5': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.]', 'desc': '存在非(中文, ".")字符', 'm': True},
'6': {'_re': r'[^\u4e00-\u9fa5\u8d5f\ue863·.﹒.**]', 'desc': '存在非(中文, ".", "*", "*")字符', 'm': True},
# 'company': {
# '8': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{30,}$', 'desc': '长度不在[1, 30]之内', 'm': True},
'8': {'_re': r'[*#?]+|^\d+$', 'desc': '存在("*","#","?")字符或仅有数字组成', 'm': True},
'9': {'_re': r'[#?]+|^\d+$', 'desc': '存在("#","?")字符或仅有数字组成', 'm': True},
# status
'a': {'_re': r'^[\s\S]{0,1}$|^[\s\S]{7,}$', 'desc': '长度不在[1, 7]之内', 'm': True},
'b': {'_re': r'[^\u4e00-\u9fa5\u3400-\u4DB5\ue863()()]', 'desc': '存在非(中文, "(", ")")字符', 'm': True},
# company_id customer_id
'c': {'_re': r'^[\da-zA-Z-]+$', 'desc': '不是由数字, 字母, "-"组成', 'm': False},
'd': {'_re': r'^[\d*a-zA-Z-]+$', 'desc': '不是由数字, 字母, "*", "-"组成', 'm': False},
'e': {'_re': r'^[\s\S]{11}$|^[\s\S]{6,9}$|^\d{3,4}-\d{6,8}$', 'desc': '长度不是11位,[6,8]位, 或者不是xxxx-xxxxxxxx格式',
'm': False},
# phone
'f': {'_re': r'^[\d-]+$', 'desc': '不是由数字, "-"组成', 'm': False},
'g': {'_re': r'^[\d*-]+$', 'desc': '不是由数字, ,"-", "*"组成', 'm': False},
# card
'h': {'_re': r'^[\d]+$', 'desc': '不是由数字,组成', 'm': False},
'i': {'_re': r'^[\d*]+$', 'desc': '不是由数字, "*"组成', 'm': False},
# 日期格式
'j': {'_re': r'^\d{4}-\d{2}-\d{2}$', 'desc': '不是yyyy-mm-dd格式', 'm': False},
#
'k': {'_re': r'', 'desc': '不在1990-01-01和当前日期之间', 'm': False},
'l': {'_re': r'', 'desc': '与明细最新记录日期不符', 'm': False},
'm': {'_re': r'', 'desc': '存在更近的日期', 'm': False},
'n': {'_re': r'', 'desc': '没有从明细里取', 'm': False},
'o': {'_re': r'', 'desc': '非正常账号', 'm': False},
# 金额
'p': {'_re': r'^[\d\.]+$', 'desc': '存在非(数字, ".")字符', 'm': False},
'q': {'_re': r'^\.', 'desc': '第一位是"."', 'm': True},
# 比例
'r': {'_re': r'^0\.\d{1,5}$|^0$', 'desc': '不是0.xxxxx的格式', 'm': False},
# 上下限
's': {'_re': r'', 'desc': '小于下限', 'm': False},
't': {'_re': r'', 'desc': '大于上限', 'm': False},
'u': {'_re': r'', 'desc': '个人缴存比例+公司缴存比例不等于缴存比例', 'm': False},
'v': {'_re': r'', 'desc': '大于开始缴存时间或record_date', 'm': False},
'w': {'_re': r'', 'desc': 'start_date大于record_date', 'm': False},
'x': {'_re': r'', 'desc': '大于明细里第一条记录的日期', 'm': False},
'y': {'_re': r'', 'desc': '', 'm': False},
'z': {'_re': r'', 'desc': '与校验有出入', 'm': False},
'a1': {'_re': r'', 'desc': '明细数据为空', 'm': False},
'a2': {'_re': r'', 'desc': '月缴有误', 'm': False},
'a3': {'_re': r'', 'desc': '王鲁宁的分析数据有误', 'm': False},
'a4': {'_re': r'', 'desc': '明细中缴存类型为提取/偿还/支取/还贷, 但是金额不为负', 'm': False},
'a5': {'_re': r'', 'desc': '明细中gjj_type值不是0或1', 'm': False},
'a6': {'_re': r'', 'desc': '明细中金额格式存在非(数字, ".", "-")字符', 'm': False},
'a7': {'_re': r'', 'desc': '明细中日期格式不是yyyy-mm或yyyy-mm-dd', 'm': False},
'a8': {'_re': r'', 'desc': '明细中有record_month但是没有record_date', 'm': False},
# 是否与明细里的公司相等
},
'use': {
'ID': {
'1': ['2'],
'0': ['3'],
'-1': [],
},
'name': {
'1': ['5'],
'0': ['6'],
'-1': [],
},
'company': {
'1': ['8'],
'0': ['9'],
'-1': ['0'],
},
'status': {
'1': ['a', 'b'],
'-1': ['0'],
},
'customer_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'card': {
'1': ['h'],
'0': ['i'],
'-1': [],
},
'company_id': {
'1': ['c'],
'0': ['d'],
'-1': [],
},
'phone': {
'1': ['f', 'e'],
'0': ['g', 'e'],
'-1': [],
},
'record_date': {
'1': ['j'],
'-1': []
},
'balance': {
'1': ['p', 'q'],
'-1': []
},
'deposit_amount': {
'1': ['p'],
'-1': []
},
'deposit_base': {
'1': ['p'],
'-1': []
},
'person_rate': {
'1': ['r'],
'-1': [],
},
'company_rate': {
'1': ['r'],
'-1': [],
},
'deposit_rate': {
'1': ['r'],
'-1': [],
},
'birthday': {
'1': ['j'],
'-1':[]
},
'init_date': {
'1': ['j'],
'-1': []
},
'start_date': {
'1': ['j'],
'-1': []
},
'rate': {'': ''},
'缺少字段': {'': ''},
'字段缺失或冗余': {'': ''},
'gjj_details': {'': ''},
# 王鲁宁的分析数据
'newest_account': {'': ''},
'detail_amount': {'': ''},
'detail_record_date': {'': ''},
'detail_record_month': {'': ''},
'detail_balance': {'': ''},
'detail_gjj_type': {'': ''},
'analyzed': {'': ''},
}
}
# 枣庄 临沧 定州 双鸭山 平顶山 宿州 赤峰 驻马店 保定 四平
configs = {
'mysql': {
'host': 'rds-jb-08.mysql.rds.aliyuncs.com',
},
'debug': True,
'limit': 50,
'log': {
'info': './'
}
}
# -*- coding: utf-8 -*-
import traceback
import random
import re
import os
import json
import datetime
import time
import logging
import copy
import contextlib
import pandas as pd
import numpy as np
import requests
import pymysql
from pymongo import MongoClient
from dateutil.relativedelta import relativedelta as Datedelta
from DBUtils.PooledDB import PooledDB
from config import configs
import utils
def logger_config(date):
info_file = os.path.join(configs['log']['info'], 'check-%s.log' % date)
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
info_handler = logging.FileHandler(info_file, 'w', encoding='utf8')
info_handler.setFormatter(logging.Formatter('%(asctime)s-%(levelname)s-%(message)s'))
logger.addHandler(info_handler)
return logger
def round_up(value):
try:
v = float(value)
return round(v * 100) / 100.0
except Exception:
log.error(traceback.print_exc())
return 0
today = datetime.date.today()
delta = datetime.timedelta(days=1)
yestoday = (today - delta).strftime('%Y-%m-%d')
TODAY = today.strftime('%Y-%m-%d')
log = logger_config(yestoday)
pool = PooledDB(pymysql, mincached=1, maxconnections=3,
host=configs['mysql']['host'],
user=configs['mysql']['user'],
password=configs['mysql']['pwd'],
charset='utf8'
)
class Reviewer:
def __init__(self, date):
self.rule = Rule()
self.rule.centers_info()
self.date = date
self.errors = []
self.fields = list(configs['use'].keys())
self.columns = self.build_cols()
self.csv = os.path.join(configs['log']['info'], 'check-%s.csv' % date)
def build_cols(self):
cols = ['cid', 'city', 'order_id', 'login', ]
for item in self.fields:
cols.append(item)
cols.append('t_%s' % item)
return cols
@staticmethod
def from_mongo(order_id):
time.sleep(0.02)
try:
url = 'http://tv.51gjj.com:11250/gjj/%s' % order_id
response = requests.get(url)
# print(response.status_code)
return response.json()
except Exception:
traceback.print_exc()
log.error(traceback.print_exc())
return {'code': -1, 'msg': '获取数据失败'}
def find_order_ids(self, tables):
order_ids = set()
for table in tables:
query = "SELECT orderId FROM node_gjj.%s WHERE `status`='success' AND DATE(updateDate) = '%s'" \
% (table, self.date)
t1 = time.clock()
conn = pool.connection()
with contextlib.closing(conn) as conn:
cursor = conn.cursor()
with contextlib.closing(cursor) as cursor:
rows = cursor.execute(query)
if configs['limit']:
datas = cursor.fetchall()[:configs['limit']]
else:
datas = cursor.fetchall()
t2 = time.clock()
log.info('Execute sql costs %.3fs, gets the number of rows is %s' % (t2 - t1, rows))
for row in datas:
order_ids.add(row[0])
return order_ids
def process(self, order_ids):
for order_id in order_ids:
log.info(str(order_id))
# print(row)
doc = self.from_mongo(order_id)
if doc['code'] == 0:
data = doc['data']['gjjData']
# warn_msg is list
try:
warn_msg = self.rule.run(data)
if warn_msg[1]:
# print(warn_msg[1])
self.warn(warn_msg[0], order_id, warn_msg[1])
except Exception:
print(order_id)
log.error(traceback.print_exc())
else:
log.info('%s: %s' % (order_id, doc['msg']))
# print('%s: %s' % (row[0], doc['msg']))
def warn(self, city, order_id, warn_msg):
params = self.login_params(order_id)
record = {'order_id': order_id, 'city': city}
try:
record['login'] = str(params)
except Exception:
record['login'] = ''
for k, v in warn_msg.items():
record[k] = v
self.errors.append(record)
log.warn('[ %s ]-[ %s ]: %s ' % (order_id, city, warn_msg))
def check_order(self, order_id):
doc = self.from_mongo(order_id)
if doc['code'] == 0:
result = doc['data']['gjjData']
warn_msg = self.rule.run(result)
log.warn(warn_msg)
else:
log.warn('[ %s ]: %s ' % (order_id, doc['msg']))
# print('%s is error: %s' % (order_id, doc['msg']))
def refresh(self):
url = 'https://tt.51gjj.com/postloan/update'
headers = {
'Content-Type': 'application/json'
}
for order_id in self.rule.need_refresh:
form = {
"param": {
"orderSn": order_id
},
"callbackUrl": "https://tt.51gjj.com:5000/redirect"
}
requests.post(url, data=form, headers=headers)
time.sleep(1)
time.sleep(120)
def login_params(self, order_id):
try:
url = 'http://tv.51gjj.com:11250/gjjlogininfo/%s' % order_id
res = requests.get(url)
r = res.json()
if not r.get('msg'):
del r['cityname']
return r
except Exception:
return {'msg': '获取登录参数错误'}
def out_result(self):
sub = Submit()
sub.login()
# for record in self.errors:
# sub.submit_bug(record['city'], record)
# time.sleep(1)
df = pd.DataFrame(self.errors, columns=self.columns)
for item in self.fields:
t_item = 't_%s' % item
if not configs['debug']:
try:
grouped = df.groupby(['city', t_item])
for (city, field), group in grouped:
d = group[:5]
title = '%s: %s' % (city, self.trans_rule_desc(field))
content = []
for error in list(d.T.to_dict().values()):
content.append(
{'order_id': error['order_id'],
'login': error['login'],
item: error[item]
})
if 'analyzed' in title:
continue
sub.submit_bug(title, content)
time.sleep(20)
except Exception as e:
log.error('[%s]: %s' % ('out_result', traceback.print_exc()))
# print('out_result', e)
df[t_item] = df[t_item].map(self.trans_rule_desc)
df.to_csv(self.csv, encoding='gbk')
@staticmethod
def trans_rule_desc(keys):
if isinstance(keys, str):
rule = keys.split('|')
desc = '; '.join([configs['rules'][i]['desc'] for i in rule[2:]])
return '%s配置为%s: %s' % (rule[0], trans_config(rule[1]), desc)
def trans_config(t):
if t == '1':
return '完整'
if t == '-1':
return '不存在'
if t == '0':
return '不完整'
def build_index():
a = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
b = '0123456789'
index = '%s%s%s' % (random.choice(a), random.choice(b), random.choice(a),)
while index in configs['rules'].keys():
index = '%s%s%s%s' % (random.choice(a), random.choice(b), random.choice(a), random.choice(a))
return index
class Rule:
_deposit_amount_error = 50
_status_set = {'交存', '缴存', '正常交存', '正常缴存', '正常', '正常汇缴', '进行中', '正常缴交', }
# 官网有问题的 ,不检查的城市: 绥化, 宁德, 驻马店, 内江
# 多个账号明细合一起的: 巴中, 重庆
_ignore_date_cid = {'356', '135', '430', '22', '443', '32', '288', '136', '247', }
_ignore_deposit_base = {'110', '19', '95', '447', '37', '351', '124', '417', '269', '62', '430', '87', '34',
'338', '57', '381', '352', '105', '27', '47', '38', '101', '312', '92', '358',
'121', '438', '94', '309', '380', '375', '367', '366', '292', '309', '368', '320', '311',
'349', '63',
}
_ignore_rate = {'22', '352', '87', '345', '380', '375', '367', '366', '292', '309', '368', '320', '311', '52', '82',
'62', '445', }
_keys = {'data', 'cid', 'orderId', 'location', 'cityName', 'create_date'}
_rate_weight = 2
_base_weight = 3
brief_keys = {"ID", "name", "card", "customer_id", "deposit_amount", "fb_deposit_amount", "person_rate",
"fb_balance", "once_balance", "status", "record_date", "company", "deposit_base", "balance",
"company_rate", "init_date", "start_date", "sex", "email", "phone", "marriage", "address",
"birthday", "company_id", "person_deposit_amount", "company_deposit_amount", "deposit_rate",
}
record_key = {"id", "gjj_type", "company", "order_id", "record_date", "op_type", "record_month",
"amount", "balance", "remark", "comments", "deposit_type", "cont_flag", "back_cont_flag"
}
detail_filter = '上年结转|前日余额|本年合计|^期初'
op_set = set()
farest_date = '1990-01-01'
infos = None
need_refresh = set()
def __init__(self):
self.cid = None
self.orderId = None
self.error = {}
self.newest_date = None
self.last_record = None
self.first_record = None
self.last_deposit = None
self.has_record = False
self.const_ret = []
self.all_ret = []
self.history_record = []
@classmethod
def centers_info(cls):
res = requests.get('http://tm.51gjj.com:1010/CenterConfig/GetCentersJson')
infos = res.json()
centers = []
for province in infos:
for city in province['城市']:
for center in city['公积金中心']:
centers.append(center)
df = pd.DataFrame(centers)
df_new = df.set_index('中心id')
for column in df_new.columns:
df_new[column] = df_new[column].map(Rule.trans)
df_new['个人缴纳比例上限'] = df_new['个人缴纳比例上限'].replace('', 0).map(float)
df_new['公司缴纳比例上限'] = df_new['公司缴纳比例上限'].replace('', 0).map(float)
df_new['个人缴纳比例下限'] = df_new['个人缴纳比例下限'].replace('', 0).map(float)
df_new['公司缴纳比例下限'] = df_new['公司缴纳比例下限'].replace('', 0).map(float)
df_new['缴纳比例上限'] = df_new['个人缴纳比例上限'] + df_new['公司缴纳比例上限']
df_new['缴纳比例下限'] = df_new['个人缴纳比例下限'] + df_new['公司缴纳比例下限']
cls.infos = df_new
@staticmethod
def trans(text):
if re.findall('不完整', text):
return '0'
if re.findall('完整', text):
return '1'
if re.findall('不存在', text):
return '-1'
return text
def check_id(self, id_num):
mode = self.get_mode('身份证')
if mode != '-1':
self.test_value(id_num, 'ID', mode)
def check_name(self, name):
mode = self.get_mode('姓名')
if mode != '-1':
self.test_value(name, 'name', mode)
# if not (re.findall(r'[·.﹒]', name) or cid in ['279', '31', '213', '35', '154']):
# rule_index = self.test_len(name, 2, 5)
# if rule_index:
# self.build_error(name, 'name', mode, rule_index)
def check_deposit_amount(self, brief):
mode = self.get_mode('月缴')
# 从明细里取的月缴/比例或者基数有问题
if self.cid in ['100', '333', '180', '390', '21', '202', '36', '270', '108', '110', '7', '81', '48'
'110', '51', '46', '39'] and not self.last_deposit:
return
self.test_value(brief['deposit_amount'], 'deposit_amount', mode)
# 基本信息里有月缴, 没有缴存基数, 个人缴存额, 没有明细
# 100-有半缴个人
if self.cid in {'141', '29', '374', '28', '339', '56', '326', '321', '138', '332', '345', '100'}:
return
passed = False
if mode != '-1':
try:
amount = float(brief['deposit_amount'])
except ValueError:
return
# 缴存基数 * 缴存比例
if brief['deposit_rate'] and brief['deposit_base']:
rate = float(brief['deposit_rate'])
base = float(brief['deposit_base'])
if rate * base - amount < abs(self._deposit_amount_error):
passed = True
# 个人缴存额 + 公司缴存额
if brief['person_deposit_amount'] and brief['company_deposit_amount']:
amount = float(brief['deposit_amount'])
p_da = float(brief['person_deposit_amount'])
c_da = float(brief['company_deposit_amount'])
if c_da + p_da - amount < abs(self._deposit_amount_error):
passed = True
# 要有明细 与最新一条汇缴明细比较
if self.last_deposit:
last_deposit = float(self.last_deposit)
if last_deposit - amount < abs(self._deposit_amount_error):
passed = True
if not passed:
self.build_error(amount, 'deposit_amount', mode, 'a2')
def check_deposit_base(self, deposit_base):
if self.cid in ['352']:
return
mode = self.get_mode('缴存基数')
self.test_value(deposit_base, 'deposit_base', mode)
def check_company(self, company):
mode = self.get_mode('公司')
if mode != '-1':
self.test_value(company, 'company', mode)
def check_status(self, status):
mode = self.get_mode('公积金状态')
self.test_value(status, 'status', mode)
def check_customer_id(self, customer_id):
if self.cid in ['61'] and not self.has_record:
return
mode = self.get_mode('公积金账号')
self.test_value(customer_id, 'customer_id', mode)
def check_card(self, card):
if self.cid in ['307']:
return
mode = self.get_mode('联名卡号')
self.test_value(card, 'card', mode)
def check_company_id(self, company_id):
mode = self.get_mode('单位账号')
self.test_value(company_id, 'company_id', mode)
def check_phone(self, phone):
mode = self.get_mode('手机号')
self.test_value(phone, 'phone', mode)
def check_date(self, i_date, s_date, r_date, ):
i_mode = self.get_mode('开户时间')
s_mode = self.get_mode('开始缴存时间')
mode = self.get_mode('公积金record_date')
if r_date:
self.test_value(r_date, 'record_date', mode)
if not self.farest_date <= r_date <= TODAY:
self.build_error(r_date, 'record_date', mode, 'k')
# 从基本信息里取的record_date
# 明细配置为完整,
# if not self.detail_ret.get('end_date'):
# # 没有明细, 可能是真的没取到, 可能是销户
# if account_state == '1':
# self.build_error(date, 'record_date', mode, 'm')
if self.cid in self._ignore_date_cid:
return
if i_date and r_date and i_date > r_date:
# init_date
self.test_value(i_date, 'init_date', i_mode)
self.build_error([i_date, s_date, r_date], 'init_date', i_mode, 'v')
return
if i_date and s_date and i_date > s_date:
# start_date
self.test_value(s_date, 'start_date', s_mode)
self.build_error([i_date, s_date, r_date], 'init_date', i_mode, 'v')
return
if r_date and s_date and s_date > r_date:
self.build_error([s_date, r_date], 'start_date', s_mode, 'w')
return
def check_rate(self, p_rate, c_rate, rate):
if p_rate and float(p_rate) > 0:
p_mode = self.get_mode('个人缴存比例')
self.test_value(p_rate, 'person_rate', p_mode)
if c_rate and float(c_rate) > 0:
c_mode = self.get_mode('公司缴存比例')
self.test_value(c_rate, 'company_rate', c_mode)
try:
if rate and float(rate) > 0:
self.test_value(rate, 'deposit_rate', '1')
except Exception:
traceback.print_exc()
log.error(self.error)
def check_birthday(self, birthday):
if birthday:
self.test_value(birthday, 'birthday', '1')
def check_balance(self, balance):
if self.cid in ['207'] and not self.has_record:
return
mode = self.get_mode('余额')
self.test_value(balance, 'balance', mode)
def check_analyzed(self, doc):
s = self.user_state(doc)
if s == doc['gjj_account_analyzed_data']['newest_account']:
return True
else:
self.build_error(doc['gjj_account_analyzed_data']['newest_account'], 'newest_account', '', 'z')
def check_details(self, details, init_date, bal):
self.skip_detail(details)
report = True
report_id = 0
item_id = ''
amount = ''
for item in details:
if item['record_month'] and not item['record_date']:
self.build_error('%s|%s' % (item['id'], item['record_date']), 'detail_record_date', '', 'a8')
if item['gjj_type'] not in ['0', '1']:
self.build_error('%s|%s' % (item['id'], item['gjj_type']), 'detail_gjj_type', '', 'a5')
return
if item['record_date'] and not re.findall(r'^\d{4}-\d{2}-\d{2}$', item['record_date']):
self.build_error('%s|%s' % (item['id'], item['record_date']), 'detail_record_date', '', 'a7')
return
if item['record_month'] and not re.findall(r'^\d{6}$', item['record_month']):
self.build_error('%s|%s' % (item['id'], item['record_month']), 'detail_record_month', '', 'a7')
return
if item['amount'] and not re.findall(r'^[\d\.\-]+$', item['amount']):
self.build_error('%s|%s' % (item['id'], item['amount']), 'detail_amount', '', 'a6')
return
if item['balance'] and not re.findall(r'^[\d\.\-]+$', item['balance']):
self.build_error('%s|%s' % (item['id'], item['balance']), 'detail_balance', '', 'a6')
return
if re.search('提取|偿还|支取|还贷', item['state']) and '-' in item['amount'] and self.zero(item['amount']):
report = False
if re.search('提取|偿还|支取|还贷', item['state']) and not re.search('退回|还贷缴存|失败|公积金偿还利息|冲账|支取红冲|支取退款', item['state']) and '-' not in item['amount'] and self.zero(item['amount']) and report:
report_id = 2
item_id = item['id']
amount = item['amount']
if report_id == 2 and report:
self.build_error('%s|%s' % (item_id, amount), 'detail_amount', '', 'a4')
d = utils.away_today(init_date)
mode = self.get_mode('公积金明细')
if mode == '1' and len(details) == 0 and d and d[0] == 0 and d[1] > 2 and self.zero(bal):
print('%s is added to set' % self.orderId)
self.need_refresh.add(self.orderId)
# self.build_error(len(details), 'gjj_details', mode, 'a1')
# 修复->check_details --> 如果还是出错就报bug
def check_analyzed_data(self, analyzed):
if self.cid in ['122']:
return
if len(self.const_ret) > 0:
if int(analyzed['cont_last_times']) != self.const_ret[-1]:
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['cont_max_times']) != max(self.const_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['sum_times']) != sum(self.const_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if len(self.all_ret) > 0:
if int(analyzed['back_cont_max_times']) != max(self.all_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['back_sum_times']) != sum(self.all_ret):
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
if int(analyzed['back_cont_last_times']) != self.all_ret[-1]:
self.build_error(str(analyzed), 'analyzed', '', 'a3')
return
def test_value(self, raw, col, mode):
used_rules = configs['use'][col][mode]
for index in used_rules:
rule = configs['rules'][index]
if bool(re.findall(rule['_re'], raw, re.I)) == rule['m']:
self.build_error(raw, col, mode, index)
def test_range(self, raw, col):
""" 检查是否在上下限之内 """
if not raw:
return
col = col.replace('缴存', '缴纳')
col = col.replace('缴纳基数', '基数')
try:
if '比例' in col:
_min = self.get_mode('%s下限' % col) / 100 / self._rate_weight
else:
_min = float(self.get_mode('%s下限' % col)) / self._base_weight
except ValueError:
_min = 0
try:
if '比例' in col:
_max = self.get_mode('%s上限' % col) / 100 * self._rate_weight
else:
_max = float(self.get_mode('%s上限' % col)) * self._base_weight
except ValueError:
_max = None
if _min > round_up(raw):
return 's'
if _max and _max < round_up(raw):
return 't'
def get_mode(self, col):
cell = self.infos.ix[int(self.cid), [col]]
mode = '-1'
if len(cell) > 0:
mode = cell[0]
return mode
def user_state(self, doc):
status = doc['gjj_brief']['status']
record_date = doc['gjj_brief']['record_date']
if not isinstance(status, str):
raise TypeError('Status')
if status:
if status in self._status_set:
return '1'
else:
return '0'
datedelta = utils.away_today(record_date)
if datedelta:
if datedelta[1] < 3 and datedelta[0] == 0:
return '1'
else:
return '0'
else:
return '0'
def build_error(self, raw, col, mode, rule_index):
"""
:param raw: 检测的元数据
:param col: 元数据所在的字段名
:param mode: 元数据的配置
:param rule_index: 规则的索引
:return:
"""
key = 't_%s' % col
if self.error.get(key):
self.error[key] += '|%s' % rule_index
else:
self.error[col] = str(raw)
self.error[key] = '%s|%s|%s' % (col, mode, rule_index)
@staticmethod
def test_len(raw, min_len=0, max_len=0):
try:
if not min_len <= len(str(raw)) <= max_len:
rule_index = '4'
configs['rules'][rule_index] = {'_re': '', 'desc': '长度不在[%d, %d]之内' % (min_len, max_len)}
return rule_index
# return '(%s)-长度不在[%d, %d]之内' % (raw, min_len, max_len)
return ''
except Exception:
log.error(traceback.print_exc())
def field_integrity(self, doc):
if doc['gjj_brief']:
lack = self.brief_keys.difference(doc['gjj_brief'].keys())
if lack:
self.build_error(lack, '缺少字段', '', 'y')
if doc['gjj_detail']:
for record in doc['gjj_detail']:
lack = self.record_key.difference(record.keys())
if lack:
self.build_error(lack, '缺少字段', '', 'y')
return
def skip_detail(self, details):
"""
:param details:
:return: newest balance, amount, company
"""
# 提取/偿还/支取/还贷
raw = []
history_df = []
if self.history_record:
history_df = pd.DataFrame(self.history_record)
history_df = history_df['record_date'].tolist()
details.extend(self.history_record)
for item in details:
item['state'] = '|'.join([item['remark'], item['deposit_type'], item['comments']])
op = re.sub('[\d公积金]+', '', item['state'])
self.op_set.add(op)
if not (re.search(self.detail_filter, item['state']) or item['record_month'] == ''):
raw.append(item)
if len(raw) == 0:
return
self.has_record = True
data = pd.DataFrame(raw)
df = data.set_index('record_date')
df = df.sort_values(by='record_month')
m = n = 0
for index, row in df[0:].iterrows():
# print(row['record_month'], row['state'])
state = row['state']
minuend = utils.format_date(row['record_month'])
if re.search('汇缴|补|缴交|汇交|缴存|交缴|未分摊转入到住房公积金|月缴公积金|半缴|漏缴|^汇\d', state) and not re.search('自定义缴存单位', state):
if m == 0:
all_mark = minuend
self.all_ret.append(1)
m += 1
mon_delta = Datedelta(minuend, all_mark)
if mon_delta.years == 0 and mon_delta.months == 1:
self.all_ret[-1] += 1
elif mon_delta.months > 1 or mon_delta.years > 0:
self.all_ret.append(1)
all_mark = minuend
if re.search('汇缴|缴交|汇交|缴存|交缴|未分摊转入到住房公积金|月缴公积金|半缴|^汇\d', state) and not re.search('[^汇及含]补|自定义缴存单位|缴存红冲|漏缴', state):
if n == 0:
const_mark = minuend
self.const_ret.append(1)
n += 1
mon_delta = Datedelta(minuend, const_mark)
# print(mon_delta.years, '_', mon_delta.months)
far_today = Datedelta(today, minuend)
if far_today.years == 0 and far_today.months < 4 and index not in history_df:
self.last_deposit = row['amount']
if mon_delta.years == 0 and mon_delta.months == 1:
# self.build_error()
self.const_ret[-1] += 1
elif mon_delta.months > 1 or mon_delta.years > 0:
self.const_ret.append(1)
const_mark = minuend
# print(self.all_ret, self.const_ret)
@staticmethod
def zero(amount):
try:
if float(amount) != 0:
return True
else:
return False
except:
return False
def run(self, data):
"""
check
:param data: gjjData['data']
:return:
"""
try:
self.orderId = data['orderId']
diff = self._keys.issubset(set(data.keys()))
if not diff:
self.build_error(diff, '字段缺失或冗余', '', 'y')
else:
for gjj_data in data['data']['gjj_data']:
if gjj_data['gjj_account_analyzed_data']['newest_account'] != '1':
self.history_record.extend(gjj_data['gjj_detail'])
for gjj_data in data['data']['gjj_data']:
self.cid = data['cid']
self.field_integrity(gjj_data)
if gjj_data['gjj_account_analyzed_data']['newest_account'] != '1':
continue
self.check_id(gjj_data['gjj_brief']['ID'])
self.check_name(gjj_data['gjj_brief']['name'])
self.check_company(gjj_data['gjj_brief']['company'])
self.check_status(gjj_data['gjj_brief']['status'])
self.check_customer_id(gjj_data['gjj_brief']['customer_id'])
self.check_card(gjj_data['gjj_brief']['card'])
self.check_company_id(gjj_data['gjj_brief']['company_id'])
self.check_phone(gjj_data['gjj_brief']['phone'])
self.check_balance(gjj_data['gjj_brief']['balance'])
# 比较复杂的检查
self.check_deposit_base(gjj_data['gjj_brief']['deposit_base'])
self.check_date(gjj_data['gjj_brief']['init_date'], gjj_data['gjj_brief']['start_date'],
gjj_data['gjj_brief']['record_date'])
self.check_rate(gjj_data['gjj_brief']['person_rate'], gjj_data['gjj_brief']['company_rate'],
gjj_data['gjj_brief']['deposit_rate'])
self.check_birthday(gjj_data['gjj_brief']['birthday'])
self.check_details(gjj_data['gjj_detail'], gjj_data['gjj_brief']['init_date'],
gjj_data['gjj_brief']['balance'])
self.check_deposit_amount(gjj_data['gjj_brief'])
self.check_analyzed_data(gjj_data['gjj_account_analyzed_data'])
self.all_ret = []
self.const_ret = []
warn_msg = copy.deepcopy(self.error)
ret = '%s%s' % (data['cityName'], self.cid), warn_msg
self.__init__()
return ret
except Exception as e:
print('%s is error' % self.orderId)
traceback.print_exc()
self.__init__()
class Submit:
"""
提交异常数据到bugclose
"""
def __init__(self):
self.session = requests.Session()
self.session.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://www.bugclose.com',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}
self.token = None
self.user = 'huangwei@jianbing.com'
self.pwd = 'Hw123211'
def login(self):
url = 'https://www.bugclose.com/cgi/user/login'
form = {
'email': self.user,
'password': self.pwd,
'inviteCode': '',
}
res = self.session.post(url, data=form)
root = res.json()
self.token = root['root']
url = 'https://www.bugclose.com/cgi/project/hasJoined'
form = {
'token': root['root'],
'id': '14124'
}
res = self.session.post(url, data=form)
ret = res.json()
def submit_bug(self, title, content):
content = self.format_json(content)
url = 'https://www.bugclose.com/cgi/bug/add'
form = {
'title': title,
'imageIds': '',
'bugType': '',
'attachmentIds': '',
'priority': 'Urgent',
'version': '',
'environment': '',
'description': content,
# 产品id
'productId': '9534',
# 指向谁 朱: 54396 ,宋: 24452
'assignToId': '24452',
'sprintId': '0',
'planDate': '',
'dueDate': '',
'planVersion': '',
'parentId': '',
'module': '',
'workload': '',
'value': '',
'tagIds': '',
'testCaseId': '',
'testTaskId': '',
'customValues': '',
'token': self.token,
'projectId': '14124',
}
res = self.session.post(url, data=form)
ret = res.json()
if ret['success']:
log.info('提交bug成功')
else:
time.sleep(30)
log.warn('%s\n%s' % (title, content))
log.warn('提交bug失败: %s' % ret)
def format_json(self, list_data):
s = ''
for row in list_data:
s += str(row) + '\n'
return s[:1990]
if __name__ == '__main__':
log.info(configs)
viewer = Reviewer(yestoday)
ts = {'gjj_status', 'gjjh5_status'}
orderids = viewer.find_order_ids(ts)
viewer.process(orderids)
# viewer.check_order('ef95904b-75ab-4f00-b82a-d9c2f3709835')
if viewer.rule.need_refresh:
viewer.refresh()
# viewer.process(viewer.rule.need_refresh)
viewer.out_result()
# -*- coding: utf-8 -*-
import datetime
from dateutil.relativedelta import relativedelta
def away_today(date):
""" caluate how long from today """
if date and isinstance(date, str):
today = datetime.date.today()
date = date.split('-')
date = datetime.date(year=int(date[0]), month=int(date[1]), day=int(date[2]))
datedelta = relativedelta(today, date)
return datedelta.years, datedelta.months, datedelta.days
def format_date(raw):
year = int(raw[:4])
mon = int(raw[4:])
return datetime.date(year=year, month=mon, day=1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment