-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathzip-census-SF1.py
More file actions
executable file
·234 lines (200 loc) · 10.7 KB
/
zip-census-SF1.py
File metadata and controls
executable file
·234 lines (200 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python3
import sys, csv, collections
# https://factfinder.census.gov/help/en/summary_level_code_list.htm
STATE_SUMLEV = '040'
COUNTY_SUMLEV = '050'
TRACT_SUMLEV = '140'
BLOCK_SUMLEV = '101'
def line_part(line, start, length):
'''
'''
return line[start-1:length+start-1]
def get_block_group_populations():
print('Reading block group populations...', file=sys.stderr)
bg_blocks = collections.defaultdict(lambda: dict(pop=0, blocks=collections.defaultdict(int)))
with open('pageo2010.sf1') as file1:
for line1 in file1:
SUMLEV = line_part(line1, 9, 3)
GEOCOMP = line_part(line1, 12, 2)
STATE = line_part(line1, 28, 2)
COUNTY = line_part(line1, 30, 3).rstrip()
TRACT = line_part(line1, 55, 6).rstrip()
BLOCK = line_part(line1, 62, 4).rstrip()
BG = BLOCK[:-3]
block_geoid = f'{STATE}{COUNTY}{TRACT}{BLOCK}'
bg_geoid = f'{STATE}{COUNTY}{TRACT}{BG}'
POP100 = line_part(line1, 319, 9) # Total population
if SUMLEV not in (BLOCK_SUMLEV, ):
continue
bg_blocks[bg_geoid]['pop'] += int(POP100)
bg_blocks[bg_geoid]['blocks'][block_geoid] += int(POP100)
return bg_blocks
def read_block_CVAPs(bg_blocks):
print('Reading block group CVAPs...', file=sys.stderr)
block_CVAPs = collections.defaultdict(lambda: collections.defaultdict(float))
with open('BlockGr.csv', encoding='latin-1') as bg_file:
for bg_row in csv.DictReader(bg_file):
if not bg_row['geoid'].startswith('15000US42'):
continue
_, bg_geoid = bg_row['geoid'].split('US')
bg_line_title = bg_row['lntitle']
bg_CVAP_EST = int(bg_row['CVAP_EST'])
bg_CVAP_MOE = int(bg_row['CVAP_MOE'])
group = bg_blocks[bg_geoid]
for (block_geoid, block_pop) in group['blocks'].items():
if group['pop'] > 0:
fraction = block_pop / group['pop']
full_geoid = f'{BLOCK_SUMLEV}0000US{block_geoid}'
block_CVAPs[full_geoid][bg_line_title] = bg_CVAP_EST * fraction
block_CVAPs[full_geoid][f'{bg_line_title}, Error'] = bg_CVAP_MOE * fraction
#print(bg_row, file=sys.stderr)
return block_CVAPs
block_CVAPs = read_block_CVAPs(get_block_group_populations())
import pprint; pprint.pprint(list(block_CVAPs.items())[300:303], stream=sys.stderr)
with open('pageo2010.sf1') as file1, open('pa000032010.sf1') as file2, open('pa000042010.sf1') as file3:
rows2, rows3 = csv.reader(file2), csv.reader(file3)
output = csv.DictWriter(sys.stdout,
('geoid', 'lat', 'lon', 'Population 2010', 'Hispanic Population 2010',
'Black Population 2010', 'Voting-Age Population 2010',
'Hispanic Voting-Age Population 2010', 'Black Voting-Age Population 2010',
'Citizen Voting-Age Population 2015',
'Citizen Voting-Age Population 2015, Error',
'Hispanic Citizen Voting-Age Population 2015',
'Hispanic Citizen Voting-Age Population 2015, Error',
'Black Citizen Voting-Age Population 2015',
'Black Citizen Voting-Age Population 2015, Error', ),
dialect='excel')
output.writeheader()
for (line1, row2, row3) in zip(file1, rows2, rows3):
SUMLEV = line_part(line1, 9, 3)
GEOCOMP = line_part(line1, 12, 2)
STATE = line_part(line1, 28, 2)
COUNTY = line_part(line1, 30, 3).rstrip()
TRACT = line_part(line1, 55, 6).rstrip()
BLOCK = line_part(line1, 62, 4).rstrip()
geoid = f'{SUMLEV}00{GEOCOMP}US{STATE}{COUNTY}{TRACT}{BLOCK}'
geo = dict(
LOGRECNO = line_part(line1, 19, 7),
POP100 = line_part(line1, 319, 9), # Total population
INTPTLAT = line_part(line1, 337, 11),
INTPTLON = line_part(line1, 348, 12),
)
sf1_3 = dict(
LOGRECNO = row2[4],
P0090001 = row2[125+1], # Total Population
P0090002 = row2[125+2], # Hispanic or Latino
P0090006 = row2[125+6], # Black Alone
P0090013 = row2[125+13], # Partially-Black
P0090018 = row2[125+18], # Partially-Black
P0090019 = row2[125+19], # Partially-Black
P0090020 = row2[125+20], # Partially-Black
P0090021 = row2[125+21], # Partially-Black
P0090029 = row2[125+29], # Partially-Black
P0090030 = row2[125+30], # Partially-Black
P0090031 = row2[125+31], # Partially-Black
P0090032 = row2[125+32], # Partially-Black
P0090039 = row2[125+39], # Partially-Black
P0090040 = row2[125+40], # Partially-Black
P0090041 = row2[125+41], # Partially-Black
P0090042 = row2[125+42], # Partially-Black
P0090043 = row2[125+43], # Partially-Black
P0090044 = row2[125+44], # Partially-Black
P0090050 = row2[125+50], # Partially-Black
P0090051 = row2[125+51], # Partially-Black
P0090052 = row2[125+52], # Partially-Black
P0090053 = row2[125+53], # Partially-Black
P0090054 = row2[125+54], # Partially-Black
P0090055 = row2[125+55], # Partially-Black
P0090060 = row2[125+60], # Partially-Black
P0090061 = row2[125+61], # Partially-Black
P0090062 = row2[125+62], # Partially-Black
P0090063 = row2[125+63], # Partially-Black
)
sf1_4 = dict(
LOGRECNO = row3[4],
P0100001 = row3[5], # Total 18+ Population
P0110001 = row3[75+1], # Total 18+ Population
P0110002 = row3[75+2], # Hispanic or Latino 18+ Population
P0110006 = row3[75+6], # Black Alone 18+ Population
P0110013 = row3[75+13], # Partially-Black 18+ Population
P0110018 = row3[75+18], # Partially-Black 18+ Population
P0110019 = row3[75+19], # Partially-Black 18+ Population
P0110020 = row3[75+20], # Partially-Black 18+ Population
P0110021 = row3[75+21], # Partially-Black 18+ Population
P0110029 = row3[75+29], # Partially-Black 18+ Population
P0110030 = row3[75+30], # Partially-Black 18+ Population
P0110031 = row3[75+31], # Partially-Black 18+ Population
P0110032 = row3[75+32], # Partially-Black 18+ Population
P0110039 = row3[75+39], # Partially-Black 18+ Population
P0110040 = row3[75+40], # Partially-Black 18+ Population
P0110041 = row3[75+41], # Partially-Black 18+ Population
P0110042 = row3[75+42], # Partially-Black 18+ Population
P0110043 = row3[75+43], # Partially-Black 18+ Population
P0110044 = row3[75+44], # Partially-Black 18+ Population
P0110050 = row3[75+50], # Partially-Black 18+ Population
P0110051 = row3[75+51], # Partially-Black 18+ Population
P0110052 = row3[75+52], # Partially-Black 18+ Population
P0110053 = row3[75+53], # Partially-Black 18+ Population
P0110054 = row3[75+54], # Partially-Black 18+ Population
P0110055 = row3[75+55], # Partially-Black 18+ Population
P0110060 = row3[75+60], # Partially-Black 18+ Population
P0110061 = row3[75+61], # Partially-Black 18+ Population
P0110062 = row3[75+62], # Partially-Black 18+ Population
P0110063 = row3[75+63], # Partially-Black 18+ Population
)
assert geo['LOGRECNO'] == sf1_3['LOGRECNO']
assert geo['LOGRECNO'] == sf1_4['LOGRECNO']
assert int(geo['POP100']) == int(sf1_3['P0090001'])
assert int(sf1_4['P0100001']) == int(sf1_4['P0110001'])
assert int(sf1_4['P0110002']) <= int(sf1_3['P0090002'])
assert int(sf1_4['P0110006']) <= int(sf1_3['P0090006'])
if SUMLEV not in (BLOCK_SUMLEV, ):
continue
print(geoid, geo, file=sys.stderr)
output.writerow({
'geoid': geoid,
'lat': float(geo['INTPTLAT'].lstrip('+')),
'lon': float(geo['INTPTLON'].lstrip('+')),
# Table P9: Hispanic and non-Hispanic race for total population
'Population 2010': int(sf1_3['P0090001']),
'Hispanic Population 2010': int(sf1_3['P0090002']),
# Every non-Hispanic Black or African American
'Black Population 2010': sum([int(sf1_3[key]) for key in (
'P0090006', 'P0090013', 'P0090018', 'P0090019', 'P0090020',
'P0090021', 'P0090029', 'P0090030', 'P0090031', 'P0090032',
'P0090039', 'P0090040', 'P0090041', 'P0090042', 'P0090043',
'P0090044', 'P0090050', 'P0090051', 'P0090052', 'P0090053',
'P0090054', 'P0090055', 'P0090060', 'P0090061', 'P0090062',
'P0090063', )]),
# Table P11: Hispanic and non-Hispanic race for 18+
'Voting-Age Population 2010': int(sf1_4['P0110001']),
'Hispanic Voting-Age Population 2010': int(sf1_4['P0110002']),
# Every 18+ non-Hispanic Black or African American
'Black Voting-Age Population 2010': sum([int(sf1_4[key]) for key in (
'P0110006', 'P0110013', 'P0110018', 'P0110019', 'P0110020',
'P0110021', 'P0110029', 'P0110030', 'P0110031', 'P0110032',
'P0110039', 'P0110040', 'P0110041', 'P0110042', 'P0110043',
'P0110044', 'P0110050', 'P0110051', 'P0110052', 'P0110053',
'P0110054', 'P0110055', 'P0110060', 'P0110061', 'P0110062',
'P0110063', )]),
'Citizen Voting-Age Population 2015': round(
block_CVAPs[geoid]['Total'],
3),
'Citizen Voting-Age Population 2015, Error': round(
block_CVAPs[geoid]['Total, Error'],
3),
'Hispanic Citizen Voting-Age Population 2015': round(
block_CVAPs[geoid]['Hispanic or Latino'],
3),
'Hispanic Citizen Voting-Age Population 2015, Error': round(
block_CVAPs[geoid]['Hispanic or Latino, Error'],
3),
'Black Citizen Voting-Age Population 2015': round(
block_CVAPs[geoid]['Black or African American Alone']
+ block_CVAPs[geoid]['Black or African American and White'],
3),
'Black Citizen Voting-Age Population 2015, Error': round(
block_CVAPs[geoid]['Black or African American Alone, Error']
+ block_CVAPs[geoid]['Black or African American and White, Error'],
3),
})