Skip to content

Commit f2e8fc4

Browse files
authored
Merge pull request #154 from PolicyEngine/maria/paper_updates
Benchmarking experiment updates
2 parents 95d34b8 + 0a5a69e commit f2e8fc4

32 files changed

Lines changed: 265107 additions & 709844 deletions

changelog_entry.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
- bump: minor
2+
changes:
3+
added:
4+
- Benchmarking experiments for wealth imputation paper draft.
5+
- MDN model to experiments run in imputing-from-scf-to-cps.ipynb.
6+
- Privacy & Terms to microimputation-dashboard.

microimputation-dashboard/app/page.tsx

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,61 @@ import { parseImputationCSV } from '@/utils/csvParser';
88
import { ImputationDataPoint } from '@/types/imputation';
99
import { parseDeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks';
1010

11+
function PrivacyModal({ isOpen, onClose }: { isOpen: boolean; onClose: () => void }) {
12+
if (!isOpen) return null;
13+
14+
return (
15+
<div className="fixed inset-0 bg-black bg-opacity-50 z-50 flex items-center justify-center p-4">
16+
<div className="bg-white rounded-lg max-w-lg w-full p-6 shadow-xl">
17+
<h2 className="text-xl font-bold text-gray-900 mb-4">Privacy & Terms of Use</h2>
18+
19+
<div className="space-y-4 text-sm text-gray-700">
20+
<div>
21+
<h3 className="font-semibold text-gray-900 mb-1">Data Privacy</h3>
22+
<p>
23+
All data uploaded to this dashboard is processed entirely within your browser.
24+
No data is transmitted to or stored on PolicyEngine servers. When you close or
25+
refresh this page, all loaded data is cleared from memory.
26+
</p>
27+
</div>
28+
29+
<div>
30+
<h3 className="font-semibold text-gray-900 mb-1">Disclaimer</h3>
31+
<p>
32+
This tool is provided &quot;as is&quot; without warranty of any kind, express or implied.
33+
PolicyEngine assumes no responsibility for the security, accuracy, or confidentiality
34+
of any data you choose to load into this application.
35+
</p>
36+
</div>
37+
38+
<div>
39+
<h3 className="font-semibold text-gray-900 mb-1">User Responsibility</h3>
40+
<p>
41+
Users are solely responsible for ensuring they have appropriate rights to use any
42+
data loaded into this dashboard and for compliance with applicable data protection
43+
regulations.
44+
</p>
45+
</div>
46+
</div>
47+
48+
<button
49+
onClick={onClose}
50+
className="mt-6 w-full bg-blue-600 hover:bg-blue-700 text-white font-medium py-2 px-4 rounded-md transition-colors"
51+
>
52+
Close
53+
</button>
54+
</div>
55+
</div>
56+
);
57+
}
58+
1159
function HomeContent() {
1260
const [data, setData] = useState<ImputationDataPoint[]>([]);
1361
const [fileName, setFileName] = useState<string>('');
1462
const [showDashboard, setShowDashboard] = useState(false);
1563
const [isLoadingFromDeeplink, setIsLoadingFromDeeplink] = useState(false);
1664
const [githubArtifactInfo, setGithubArtifactInfo] = useState<GitHubArtifactInfo | null>(null);
65+
const [showPrivacyModal, setShowPrivacyModal] = useState(false);
1766

1867
const searchParams = useSearchParams();
1968
const deeplinkParams = parseDeeplinkParams(searchParams);
@@ -109,10 +158,20 @@ function HomeContent() {
109158
>
110159
PolicyEngine.org
111160
</a>
161+
{' • '}
162+
<button
163+
onClick={() => setShowPrivacyModal(true)}
164+
className="text-blue-600 hover:text-blue-800"
165+
>
166+
Privacy & Terms
167+
</button>
112168
</p>
113169
</div>
114170
</div>
115171
</footer>
172+
173+
{/* Privacy Modal */}
174+
<PrivacyModal isOpen={showPrivacyModal} onClose={() => setShowPrivacyModal(false)} />
116175
</main>
117176
);
118177
}

microimputation-dashboard/components/BenchmarkLossCharts.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps)
372372
<span className="font-semibold text-gray-900">{bestModel.quantileLoss.toFixed(6)}</span>
373373
</div>
374374
{bestModel.quantileTrainTestRatio !== undefined && (
375-
<span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-amber-600' : 'text-gray-700'}`}>
375+
<span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-gray-700' : 'text-gray-900'}`}>
376376
Train/test ratio: {bestModel.quantileTrainTestRatio.toFixed(3)}
377377
</span>
378378
)}

microimputation-dashboard/components/DistributionOverlay.tsx

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,26 @@ import {
1111
Tooltip,
1212
Legend,
1313
ResponsiveContainer,
14+
Brush,
1415
} from 'recharts';
1516

17+
/**
18+
* Format a number to scientific notation if it's very large or very small
19+
* Returns the original number formatted normally if within reasonable range
20+
*/
21+
function formatLargeNumber(value: number, precision: number = 2): string {
22+
const absValue = Math.abs(value);
23+
// Use scientific notation for values >= 100,000 or <= 0.0001 (but not 0)
24+
if (absValue >= 100000 || (absValue > 0 && absValue <= 0.0001)) {
25+
return value.toExponential(precision);
26+
}
27+
// For smaller numbers, use fixed notation
28+
if (absValue < 1 && absValue > 0) {
29+
return value.toFixed(precision + 2);
30+
}
31+
return value.toFixed(precision);
32+
}
33+
1634
interface DistributionOverlayProps {
1735
data: ImputationDataPoint[];
1836
}
@@ -72,14 +90,16 @@ export default function DistributionOverlay({
7290
const info = JSON.parse(d.additional_info);
7391

7492
if (d.metric_name === 'histogram_distribution') {
75-
// Numerical variable
93+
// Numerical variable - use scientific notation for large values
94+
const binStartFormatted = formatLargeNumber(info.bin_start);
95+
const binEndFormatted = formatLargeNumber(info.bin_end);
7696
(distributions[variable].data as BinData[]).push({
7797
binIndex: info.bin_index,
7898
binStart: info.bin_start,
7999
binEnd: info.bin_end,
80100
donorHeight: info.donor_height,
81101
receiverHeight: info.receiver_height,
82-
binLabel: `${info.bin_start.toFixed(2)}-${info.bin_end.toFixed(2)}`,
102+
binLabel: `${binStartFormatted}-${binEndFormatted}`,
83103
});
84104
distributions[variable].nSamplesDonor = info.n_samples_donor;
85105
distributions[variable].nSamplesReceiver = info.n_samples_receiver;
@@ -130,7 +150,7 @@ export default function DistributionOverlay({
130150

131151
return (
132152
<div>
133-
<ResponsiveContainer width="100%" height={400}>
153+
<ResponsiveContainer width="100%" height={580}>
134154
<BarChart
135155
data={chartData}
136156
margin={{ top: 20, right: 30, left: 20, bottom: 60 }}
@@ -161,11 +181,19 @@ export default function DistributionOverlay({
161181
/>
162182
<Tooltip
163183
formatter={(value: number) => [`${value.toFixed(2)}%`, '']}
164-
labelFormatter={(label) => `Bin: ${label}`}
184+
labelFormatter={(_label, payload) => {
185+
if (payload && payload.length > 0 && payload[0].payload) {
186+
const { binStart, binEnd } = payload[0].payload;
187+
// Show full values with commas in tooltip for readability
188+
const startStr = binStart.toLocaleString(undefined, { maximumFractionDigits: 2 });
189+
const endStr = binEnd.toLocaleString(undefined, { maximumFractionDigits: 2 });
190+
return `Bin: ${startStr} - ${endStr}`;
191+
}
192+
return `Bin: ${_label}`;
193+
}}
165194
contentStyle={{ color: '#000000' }}
166195
labelStyle={{ color: '#000000' }}
167196
/>
168-
<Legend wrapperStyle={{ color: '#000000', paddingTop: '10px' }} />
169197
<Bar
170198
dataKey="Donor"
171199
fill="#3b82f6"
@@ -178,10 +206,21 @@ export default function DistributionOverlay({
178206
fillOpacity={0.7}
179207
name={`Receiver (n=${dist.nSamplesReceiver})`}
180208
/>
209+
<Brush
210+
dataKey="name"
211+
height={30}
212+
stroke="#8884d8"
213+
fill="#f3f4f6"
214+
tickFormatter={() => ''}
215+
/>
216+
<Legend
217+
verticalAlign="bottom"
218+
wrapperStyle={{ color: '#000000', paddingTop: '45px' }}
219+
/>
181220
</BarChart>
182221
</ResponsiveContainer>
183222
<p className="text-xs text-gray-600 mt-2 text-center">
184-
Histogram with {(dist.data as BinData[]).length} bins. Each bin shows the percentage of values falling within that range.
223+
Histogram with {(dist.data as BinData[]).length} bins. Drag the handles below to zoom into a specific range.
185224
Overlapping bars indicate similar distributions.
186225
</p>
187226
</div>

microimputation-dashboard/components/ImputationResults.tsx

Lines changed: 78 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ interface DistributionMetric {
1414
method: string;
1515
metricName: string;
1616
value: number;
17+
normalizedValue?: number; // Wasserstein distance as percentage of variable range
18+
variableRange?: number; // Range of the variable for context
1719
}
1820

1921
export default function ImputationResults({ data }: ImputationResultsProps) {
@@ -22,6 +24,34 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
2224
return data.filter(d => d.type === 'distribution_distance');
2325
}, [data]);
2426

27+
// Extract variable ranges from distribution_bins data
28+
const variableRanges = useMemo(() => {
29+
const ranges: Record<string, { min: number; max: number }> = {};
30+
const distributionBins = data.filter(d => d.type === 'distribution_bins' && d.metric_name === 'histogram_distribution');
31+
32+
distributionBins.forEach(d => {
33+
try {
34+
const info = JSON.parse(d.additional_info);
35+
const variable = d.variable;
36+
37+
if (!ranges[variable]) {
38+
ranges[variable] = { min: Infinity, max: -Infinity };
39+
}
40+
41+
if (info.bin_start !== undefined) {
42+
ranges[variable].min = Math.min(ranges[variable].min, info.bin_start);
43+
}
44+
if (info.bin_end !== undefined) {
45+
ranges[variable].max = Math.max(ranges[variable].max, info.bin_end);
46+
}
47+
} catch (e) {
48+
// Ignore parsing errors
49+
}
50+
});
51+
52+
return ranges;
53+
}, [data]);
54+
2555
// Group by metric type
2656
const { wassersteinData, klDivergenceData } = useMemo(() => {
2757
const wasserstein: DistributionMetric[] = [];
@@ -36,21 +66,28 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
3666
};
3767

3868
if (d.metric_name === 'wasserstein_distance') {
69+
// Calculate normalized value as percentage of variable range
70+
const range = variableRanges[d.variable];
71+
if (range && range.max > range.min) {
72+
const variableRange = range.max - range.min;
73+
metric.variableRange = variableRange;
74+
metric.normalizedValue = (metric.value / variableRange) * 100;
75+
}
3976
wasserstein.push(metric);
4077
} else if (d.metric_name === 'kl_divergence') {
4178
klDiv.push(metric);
4279
}
4380
});
4481

45-
// Sort by value (ascending - lower is better)
46-
wasserstein.sort((a, b) => a.value - b.value);
82+
// Sort by normalized value if available, otherwise by raw value (ascending - lower is better)
83+
wasserstein.sort((a, b) => (a.normalizedValue ?? a.value) - (b.normalizedValue ?? b.value));
4784
klDiv.sort((a, b) => a.value - b.value);
4885

4986
return {
5087
wassersteinData: wasserstein,
5188
klDivergenceData: klDiv
5289
};
53-
}, [distributionData]);
90+
}, [distributionData, variableRanges]);
5491

5592
const hasWasserstein = wassersteinData.length > 0;
5693
const hasKLDivergence = klDivergenceData.length > 0;
@@ -59,13 +96,17 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
5996
return null;
6097
}
6198

62-
// Color function based on value quality (lower is better)
63-
const getWassersteinColor = (value: number): string => {
64-
if (value < 0.01) return '#16a34a'; // Dark green - excellent
65-
if (value < 0.05) return '#22c55e'; // Green - good
66-
if (value < 0.1) return '#eab308'; // Yellow - moderate
67-
if (value < 0.2) return '#f97316'; // Orange - fair
68-
return '#ef4444'; // Red - poor
99+
// Color function based on normalized value (percentage of range) - lower is better
100+
const getWassersteinColor = (normalizedValue: number | undefined, rawValue: number): string => {
101+
// Use normalized value if available, otherwise fall back to raw thresholds
102+
const value = normalizedValue ?? (rawValue * 100); // Assume raw is already a fraction if no range
103+
104+
// Thresholds as percentage of variable range
105+
if (value < 1) return '#16a34a'; // Dark green - excellent (<1% of range)
106+
if (value < 3) return '#22c55e'; // Green - good (<3% of range)
107+
if (value < 5) return '#eab308'; // Yellow - moderate (<5% of range)
108+
if (value < 10) return '#f97316'; // Orange - fair (<10% of range)
109+
return '#ef4444'; // Red - poor (>=10% of range)
69110
};
70111

71112
const getKLColor = (value: number): string => {
@@ -112,9 +153,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
112153
greater differences between imputed and true distributions.
113154
</p>
114155
<p className="text-sm text-gray-700">
115-
<strong>Interpretation:</strong> Values closer to 0 are better. Generally, values below
116-
0.05 indicate good imputation quality, while values above 0.2 suggest significant
117-
distributional differences.
156+
<strong>Interpretation:</strong> Since Wasserstein distance is scale-dependent, quality is assessed
157+
relative to each variable&apos;s range. A distance of &lt;1% of the variable range is excellent,
158+
&lt;3% is good, &lt;5% is moderate, &lt;10% is fair, and &ge;10% suggests poor distributional match.
118159
</p>
119160
</div>
120161

@@ -130,14 +171,21 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
130171
<XAxis type="number" tick={{ fill: '#000000' }} />
131172
<YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
132173
<Tooltip
133-
formatter={(value: number) => [value.toFixed(6), 'Wasserstein Distance']}
174+
formatter={(value: number, _name: string, props: { payload?: DistributionMetric }) => {
175+
const normalizedValue = props.payload?.normalizedValue;
176+
const distanceStr = value.toFixed(6);
177+
const pctStr = normalizedValue !== undefined ? ` (${normalizedValue.toFixed(2)}% of range)` : '';
178+
return [`${distanceStr}${pctStr}`, 'Wasserstein Distance'];
179+
}}
180+
contentStyle={{ color: '#000000' }}
181+
labelStyle={{ color: '#000000' }}
134182
/>
135183
<Legend wrapperStyle={{ color: '#000000' }} />
136184
<Bar dataKey="value" name="Wasserstein Distance">
137185
{wassersteinData.map((entry, index) => (
138186
<Cell
139187
key={`cell-${index}`}
140-
fill={getWassersteinColor(entry.value)}
188+
fill={getWassersteinColor(entry.normalizedValue, entry.value)}
141189
/>
142190
))}
143191
</Bar>
@@ -156,6 +204,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
156204
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
157205
Wasserstein Distance
158206
</th>
207+
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
208+
% of Range
209+
</th>
159210
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
160211
Quality Assessment
161212
</th>
@@ -166,16 +217,19 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
166217
let assessment = '';
167218
let assessmentColor = '';
168219

169-
if (item.value < 0.01) {
220+
// Use normalized value (percentage of range) for assessment
221+
const normalizedValue = item.normalizedValue ?? (item.value * 100);
222+
223+
if (normalizedValue < 1) {
170224
assessment = 'Excellent';
171225
assessmentColor = 'text-green-700 font-semibold';
172-
} else if (item.value < 0.05) {
226+
} else if (normalizedValue < 3) {
173227
assessment = 'Good';
174228
assessmentColor = 'text-green-600';
175-
} else if (item.value < 0.1) {
229+
} else if (normalizedValue < 5) {
176230
assessment = 'Moderate';
177231
assessmentColor = 'text-yellow-600';
178-
} else if (item.value < 0.2) {
232+
} else if (normalizedValue < 10) {
179233
assessment = 'Fair';
180234
assessmentColor = 'text-orange-600';
181235
} else {
@@ -191,6 +245,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
191245
<td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
192246
{item.value.toFixed(6)}
193247
</td>
248+
<td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
249+
{item.normalizedValue !== undefined ? `${item.normalizedValue.toFixed(2)}%` : 'N/A'}
250+
</td>
194251
<td className={`px-4 py-3 whitespace-nowrap text-sm ${assessmentColor}`}>
195252
{assessment}
196253
</td>
@@ -243,6 +300,8 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
243300
<YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
244301
<Tooltip
245302
formatter={(value: number) => [value.toFixed(6), 'KL-Divergence']}
303+
contentStyle={{ color: '#000000' }}
304+
labelStyle={{ color: '#000000' }}
246305
/>
247306
<Legend wrapperStyle={{ color: '#000000' }} />
248307
<Bar dataKey="value" name="KL-Divergence">

0 commit comments

Comments
 (0)