Model-PA/zip-census-SF1.py at master · PlanScore/Model-PA · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python3
import sys, csv, collections

# https://factfinder.census.gov/help/en/summary_level_code_list.htm
STATE_SUMLEV = '040'
COUNTY_SUMLEV = '050'
TRACT_SUMLEV = '140'
BLOCK_SUMLEV = '101'

def line_part(line, start, length):
    '''
    '''
    return line[start-1:length+start-1]

def get_block_group_populations():
    print('Reading block group populations...', file=sys.stderr)
    bg_blocks = collections.defaultdict(lambda: dict(pop=0, blocks=collections.defaultdict(int)))

    with open('pageo2010.sf1') as file1:
        for line1 in file1:
            SUMLEV = line_part(line1, 9, 3)
            GEOCOMP = line_part(line1, 12, 2)
            STATE = line_part(line1, 28, 2)
            COUNTY = line_part(line1, 30, 3).rstrip()
            TRACT = line_part(line1, 55, 6).rstrip()
            BLOCK = line_part(line1, 62, 4).rstrip()
            BG = BLOCK[:-3]
            block_geoid = f'{STATE}{COUNTY}{TRACT}{BLOCK}'
            bg_geoid = f'{STATE}{COUNTY}{TRACT}{BG}'

            POP100 = line_part(line1, 319, 9) # Total population

            if SUMLEV not in (BLOCK_SUMLEV, ):
                continue

            bg_blocks[bg_geoid]['pop'] += int(POP100)
            bg_blocks[bg_geoid]['blocks'][block_geoid] += int(POP100)

    return bg_blocks

def read_block_CVAPs(bg_blocks):
    print('Reading block group CVAPs...', file=sys.stderr)
    block_CVAPs = collections.defaultdict(lambda: collections.defaultdict(float))

    with open('BlockGr.csv', encoding='latin-1') as bg_file:
        for bg_row in csv.DictReader(bg_file):
            if not bg_row['geoid'].startswith('15000US42'):
                continue

            _, bg_geoid = bg_row['geoid'].split('US')
            bg_line_title = bg_row['lntitle']
            bg_CVAP_EST = int(bg_row['CVAP_EST'])
            bg_CVAP_MOE = int(bg_row['CVAP_MOE'])
            group = bg_blocks[bg_geoid]

            for (block_geoid, block_pop) in group['blocks'].items():
                if group['pop'] > 0:
                    fraction = block_pop / group['pop']
                    full_geoid = f'{BLOCK_SUMLEV}0000US{block_geoid}'
                    block_CVAPs[full_geoid][bg_line_title] = bg_CVAP_EST * fraction
                    block_CVAPs[full_geoid][f'{bg_line_title}, Error'] = bg_CVAP_MOE * fraction

            #print(bg_row, file=sys.stderr)

    return block_CVAPs

block_CVAPs = read_block_CVAPs(get_block_group_populations())
import pprint; pprint.pprint(list(block_CVAPs.items())[300:303], stream=sys.stderr)

with open('pageo2010.sf1') as file1, open('pa000032010.sf1') as file2, open('pa000042010.sf1') as file3:
    rows2, rows3 = csv.reader(file2), csv.reader(file3)
    output = csv.DictWriter(sys.stdout,
        ('geoid', 'lat', 'lon', 'Population 2010', 'Hispanic Population 2010',
        'Black Population 2010', 'Voting-Age Population 2010',
        'Hispanic Voting-Age Population 2010', 'Black Voting-Age Population 2010',
        'Citizen Voting-Age Population 2015',
        'Citizen Voting-Age Population 2015, Error',
        'Hispanic Citizen Voting-Age Population 2015',
        'Hispanic Citizen Voting-Age Population 2015, Error',
        'Black Citizen Voting-Age Population 2015',
        'Black Citizen Voting-Age Population 2015, Error', ),
        dialect='excel')
    output.writeheader()

    for (line1, row2, row3) in zip(file1, rows2, rows3):
        SUMLEV = line_part(line1, 9, 3)
        GEOCOMP = line_part(line1, 12, 2)
        STATE = line_part(line1, 28, 2)
        COUNTY = line_part(line1, 30, 3).rstrip()
        TRACT = line_part(line1, 55, 6).rstrip()
        BLOCK = line_part(line1, 62, 4).rstrip()
        geoid = f'{SUMLEV}00{GEOCOMP}US{STATE}{COUNTY}{TRACT}{BLOCK}'

        geo = dict(
            LOGRECNO = line_part(line1, 19, 7),
            POP100 = line_part(line1, 319, 9), # Total population
            INTPTLAT = line_part(line1, 337, 11),
            INTPTLON = line_part(line1, 348, 12),
            )

        sf1_3 = dict(
            LOGRECNO = row2[4],
            P0090001 = row2[125+1], # Total Population
            P0090002 = row2[125+2], # Hispanic or Latino
            P0090006 = row2[125+6], # Black Alone
            P0090013 = row2[125+13], # Partially-Black
            P0090018 = row2[125+18], # Partially-Black
            P0090019 = row2[125+19], # Partially-Black
            P0090020 = row2[125+20], # Partially-Black
            P0090021 = row2[125+21], # Partially-Black
            P0090029 = row2[125+29], # Partially-Black
            P0090030 = row2[125+30], # Partially-Black
            P0090031 = row2[125+31], # Partially-Black
            P0090032 = row2[125+32], # Partially-Black
            P0090039 = row2[125+39], # Partially-Black
            P0090040 = row2[125+40], # Partially-Black
            P0090041 = row2[125+41], # Partially-Black
            P0090042 = row2[125+42], # Partially-Black
            P0090043 = row2[125+43], # Partially-Black
            P0090044 = row2[125+44], # Partially-Black
            P0090050 = row2[125+50], # Partially-Black
            P0090051 = row2[125+51], # Partially-Black
            P0090052 = row2[125+52], # Partially-Black
            P0090053 = row2[125+53], # Partially-Black
            P0090054 = row2[125+54], # Partially-Black
            P0090055 = row2[125+55], # Partially-Black
            P0090060 = row2[125+60], # Partially-Black
            P0090061 = row2[125+61], # Partially-Black
            P0090062 = row2[125+62], # Partially-Black
            P0090063 = row2[125+63], # Partially-Black
            )

        sf1_4 = dict(
            LOGRECNO = row3[4],
            P0100001 = row3[5], # Total 18+ Population
            P0110001 = row3[75+1], # Total 18+ Population
            P0110002 = row3[75+2], # Hispanic or Latino 18+ Population
            P0110006 = row3[75+6], # Black Alone 18+ Population
            P0110013 = row3[75+13], # Partially-Black 18+ Population
            P0110018 = row3[75+18], # Partially-Black 18+ Population
            P0110019 = row3[75+19], # Partially-Black 18+ Population
            P0110020 = row3[75+20], # Partially-Black 18+ Population
            P0110021 = row3[75+21], # Partially-Black 18+ Population
            P0110029 = row3[75+29], # Partially-Black 18+ Population
            P0110030 = row3[75+30], # Partially-Black 18+ Population
            P0110031 = row3[75+31], # Partially-Black 18+ Population
            P0110032 = row3[75+32], # Partially-Black 18+ Population
            P0110039 = row3[75+39], # Partially-Black 18+ Population
            P0110040 = row3[75+40], # Partially-Black 18+ Population
            P0110041 = row3[75+41], # Partially-Black 18+ Population
            P0110042 = row3[75+42], # Partially-Black 18+ Population
            P0110043 = row3[75+43], # Partially-Black 18+ Population
            P0110044 = row3[75+44], # Partially-Black 18+ Population
            P0110050 = row3[75+50], # Partially-Black 18+ Population
            P0110051 = row3[75+51], # Partially-Black 18+ Population
            P0110052 = row3[75+52], # Partially-Black 18+ Population
            P0110053 = row3[75+53], # Partially-Black 18+ Population
            P0110054 = row3[75+54], # Partially-Black 18+ Population
            P0110055 = row3[75+55], # Partially-Black 18+ Population
            P0110060 = row3[75+60], # Partially-Black 18+ Population
            P0110061 = row3[75+61], # Partially-Black 18+ Population
            P0110062 = row3[75+62], # Partially-Black 18+ Population
            P0110063 = row3[75+63], # Partially-Black 18+ Population
            )

        assert geo['LOGRECNO'] == sf1_3['LOGRECNO']
        assert geo['LOGRECNO'] == sf1_4['LOGRECNO']
        assert int(geo['POP100']) == int(sf1_3['P0090001'])
        assert int(sf1_4['P0100001']) == int(sf1_4['P0110001'])
        assert int(sf1_4['P0110002']) <= int(sf1_3['P0090002'])
        assert int(sf1_4['P0110006']) <= int(sf1_3['P0090006'])

        if SUMLEV not in (BLOCK_SUMLEV, ):
            continue

        print(geoid, geo, file=sys.stderr)

        output.writerow({
            'geoid': geoid,
            'lat': float(geo['INTPTLAT'].lstrip('+')),
            'lon': float(geo['INTPTLON'].lstrip('+')),

            # Table P9: Hispanic and non-Hispanic race for total population
            'Population 2010': int(sf1_3['P0090001']),
            'Hispanic Population 2010': int(sf1_3['P0090002']),

            # Every non-Hispanic Black or African American
            'Black Population 2010': sum([int(sf1_3[key]) for key in (
                'P0090006', 'P0090013', 'P0090018', 'P0090019', 'P0090020',
                'P0090021', 'P0090029', 'P0090030', 'P0090031', 'P0090032',
                'P0090039', 'P0090040', 'P0090041', 'P0090042', 'P0090043',
                'P0090044', 'P0090050', 'P0090051', 'P0090052', 'P0090053',
                'P0090054', 'P0090055', 'P0090060', 'P0090061', 'P0090062',
                'P0090063', )]),

            # Table P11: Hispanic and non-Hispanic race for 18+
            'Voting-Age Population 2010': int(sf1_4['P0110001']),
            'Hispanic Voting-Age Population 2010': int(sf1_4['P0110002']),

            # Every 18+ non-Hispanic Black or African American
            'Black Voting-Age Population 2010': sum([int(sf1_4[key]) for key in (
                'P0110006', 'P0110013', 'P0110018', 'P0110019', 'P0110020',
                'P0110021', 'P0110029', 'P0110030', 'P0110031', 'P0110032',
                'P0110039', 'P0110040', 'P0110041', 'P0110042', 'P0110043',
                'P0110044', 'P0110050', 'P0110051', 'P0110052', 'P0110053',
                'P0110054', 'P0110055', 'P0110060', 'P0110061', 'P0110062',
                'P0110063', )]),

            'Citizen Voting-Age Population 2015': round(
                block_CVAPs[geoid]['Total'],
                3),

            'Citizen Voting-Age Population 2015, Error': round(
                block_CVAPs[geoid]['Total, Error'],
                3),

            'Hispanic Citizen Voting-Age Population 2015': round(
                block_CVAPs[geoid]['Hispanic or Latino'],
                3),

            'Hispanic Citizen Voting-Age Population 2015, Error': round(
                block_CVAPs[geoid]['Hispanic or Latino, Error'],
                3),

            'Black Citizen Voting-Age Population 2015': round(
                block_CVAPs[geoid]['Black or African American Alone']
              + block_CVAPs[geoid]['Black or African American and White'],
                3),

            'Black Citizen Voting-Age Population 2015, Error': round(
                block_CVAPs[geoid]['Black or African American Alone, Error']
              + block_CVAPs[geoid]['Black or African American and White, Error'],
                3),
            })