Commit 27eee097 authored by qihaoyu's avatar qihaoyu

remote main.py

parent af6f8821
Pipeline #330 failed with stages
......@@ -2,4 +2,6 @@
venv/
__pycache__/
*.html
*.xml
\ No newline at end of file
*.xml
*.pdf
*.log
\ No newline at end of file
......@@ -2,16 +2,8 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="5bfc29c1-469c-4de2-8833-97586daff38c" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/dataSources.local.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/dataSources.local.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/dataSources/16830a47-a264-4867-9152-47f09ec57145.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/dataSources/16830a47-a264-4867-9152-47f09ec57145.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/oie-yearly-report.iml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/oie-yearly-report.iml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/have_7.html" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" afterPath="$PROJECT_DIR$/main.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/postgre/utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/postgre/utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/test.py" beforeDir="false" afterPath="$PROJECT_DIR$/test.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change beforePath="$PROJECT_DIR$/main.py" beforeDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -273,7 +265,7 @@
<workItem from="1570519082454" duration="781000" />
<workItem from="1570520677785" duration="7992000" />
<workItem from="1575857587490" duration="2280000" />
<workItem from="1579050553642" duration="1236000" />
<workItem from="1579050553642" duration="1554000" />
</task>
<servers />
</component>
......
from postgre.utils import util as pysql
from yearly_report_analysis import year
summary_col_dict = {
'OIE-Listed disease': 'oie_listed_disease',
'occurrence': 'occurrence',
'Serotype(s)': 'serotype',
'New outbreaks': 'new_outbreaks',
'Total outbreaks': 'total_outbreaks',
'Species': 'species',
'Control Measures': 'control_measures',
'Official vaccination': 'official_vaccination',
'Measuring units': 'measuring_units',
'Susceptible': 'susceptible',
'Cases': 'cases',
'Deaths': 'deaths',
'Killed and disposed of': 'killed_and_disposed_of',
'Slaughtered': 'slaughtered',
'Vaccination in response to the outbreak(s)': 'vaccination_in_response_to_the_outbreak',
'id': 'disease_id'
}
select_length = pysql.exec('SELECT COUNT(*) FROM public.oie_yearlyreport_new')
length = select_length[0]
def sub_select(length, disease_dict):
global_start = 500
for i in range(global_start, length, 10):
count = 10
start = i
with open('ana.log', 'a', encoding='utf-8') as file:
file.write('正在处理' + str(i) + '-' + str(i+9) + ' 共' + str(length) + '条\n')
print('正在处理' + str(i) + '-' + str(i+10) + ' 共' + str(length) + '条')
select_res = pysql.selectData(('country_code', 'year', 'type', 'json_text', 'id'), 'oie_yearlyreport_new', False, (count, start))
k = 1
for j in select_res:
with open('ana.log', 'a', encoding='utf-8') as file:
file.write('处理第' + str(k) + '条中……\n')
print('处理第' + str(k) + '条中……')
a_html = year.analysis(j['json_text'])
result = handle_all_analysis(a_html, disease_dict, j['id'], j['country_code'], j['year'], j['type'])
if result:
print('第' + str(k) + '条处理完成')
file.write('第' + str(k) + '条处理完成\n')
else:
print('第' + str(k) + '条处理出现问题,已记录')
file.write('第' + str(k) + '条处理出现问题,已记录\n')
k = k + 1
def handle_all_analysis(a_html, disease_dict, year_report_id, country_code, year, type):
if a_html == []:
with open('err.log', 'a', encoding='utf-8') as file:
file.writelines(str([a_html, year_report_id, country_code, year, type]) + '\n')
return False
table_1 = a_html[0]
table_3 = a_html[1]
table_1 = handle_a_table_1(table_1, [0, 1, 2], disease_dict)
for i in table_1:
tmp = {}
for key, value in i.items():
tmp[summary_col_dict[key]] = '...' if value == '' else value
tmp['year_report_id'] = year_report_id
tmp['country_code'] = country_code
tmp['year'] = year
tmp['type'] = type
if is_exist('oie_year_report_summary', ['oie_listed_disease'], tmp):
with open('ana.log', 'a', encoding='utf-8') as file:
file.write('已存在:' + str(tmp) + '\n')
print('已存在:' + str(tmp))
continue
else:
insert_into_summary('oie_year_report_summary', tmp)
# table_3 = handle_a_table_3(table_3)
return True
def handle_1(json):
pass
def handle_a_table_1(table, auto_col, disease_dict):
auto_col_data = {}
ok_table = []
for tr in table:
if len(table[tr]) < 1:
continue
for td in table[tr]['td']:
if len(td) < 2:
continue
if td[0] != '':
auto_col_data = {}
for i in auto_col:
if td[i].strip() != '':
td[i] = td[i].replace(' (Domestic and Wild)', '')
td[i] = td[i].replace(' (Domestic)', '')
td[i] = td[i].replace(' (Wild)', '')
auto_col_data[i] = td[i]
else:
if i not in auto_col_data:
auto_col_data[i] = ''
td[i] = auto_col_data[i]
tmp_td = {}
j = 0
for i in table[tr]['th']:
tmp_td[i] = td[j]
j = j + 1
disease_name = tmp_td['OIE-Listed disease']
disease_name = disease_name.replace(' (Domestic and Wild)', '')
disease_name = disease_name.replace(' (Domestic)', '')
disease_name = disease_name.replace(' (Wild)', '')
if (disease_name not in disease_dict):
pysql.insertData({'oiednameen': disease_name}, 'oie_disease')
disease_dict = get_disease_list()
tmp_td['id'] = disease_dict[disease_name]
ok_table.append(tmp_td)
return ok_table
def handle_a_table_3(title):
ok_table = title
# for a_title in title:
# for td in title[a_title]:
# print(td)
return ok_table
# 获得所有疫病的list
def get_disease_list():
disease_list = pysql.selectData(['oiednameen', 'id'], 'oie_disease')
disease_detail = {}
for i in disease_list:
disease_detail[i['oiednameen']] = i['id']
return disease_detail
def insert_into_summary(table_name, data):
pysql.insertData(data, table_name)
def is_exist (table_name, col, data):
exsist = pysql.selectData(col, table_name, data)
if exsist:
return True
else:
return False
disease_dict = get_disease_list()
sub_select(length, disease_dict)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment