PolicyEngine
diff --git a/‎changelog_entry.yaml‎
Lines changed: 6 additions & 0 deletions b/‎changelog_entry.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎microimputation-dashboard/app/page.tsx‎
Lines changed: 59 additions & 0 deletions b/‎microimputation-dashboard/app/page.tsx‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎microimputation-dashboard/components/BenchmarkLossCharts.tsx‎
Lines changed: 1 addition & 1 deletion b/‎microimputation-dashboard/components/BenchmarkLossCharts.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎microimputation-dashboard/components/DistributionOverlay.tsx‎
Lines changed: 45 additions & 6 deletions b/‎microimputation-dashboard/components/DistributionOverlay.tsx‎
Lines changed: 45 additions & 6 deletions
diff --git a/‎microimputation-dashboard/components/ImputationResults.tsx‎
Lines changed: 78 additions & 19 deletions b/‎microimputation-dashboard/components/ImputationResults.tsx‎
Lines changed: 78 additions & 19 deletions
@@ -0,0 +1,6 @@
+- bump: minor
+  changes:
+    added:
+    - Benchmarking experiments for wealth imputation paper draft.
+    - MDN model to experiments run in imputing-from-scf-to-cps.ipynb.
+    - Privacy & Terms to microimputation-dashboard.
@@ -8,12 +8,61 @@ import { parseImputationCSV } from '@/utils/csvParser';
 import { ImputationDataPoint } from '@/types/imputation';
 import { parseDeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks';
 
+function PrivacyModal({ isOpen, onClose }: { isOpen: boolean; onClose: () => void }) {
+  if (!isOpen) return null;
+
+  return (
+    <div className="fixed inset-0 bg-black bg-opacity-50 z-50 flex items-center justify-center p-4">
+      <div className="bg-white rounded-lg max-w-lg w-full p-6 shadow-xl">
+        <h2 className="text-xl font-bold text-gray-900 mb-4">Privacy & Terms of Use</h2>
+
+        <div className="space-y-4 text-sm text-gray-700">
+          <div>
+            <h3 className="font-semibold text-gray-900 mb-1">Data Privacy</h3>
+            <p>
+              All data uploaded to this dashboard is processed entirely within your browser.
+              No data is transmitted to or stored on PolicyEngine servers. When you close or
+              refresh this page, all loaded data is cleared from memory.
+            </p>
+          </div>
+
+          <div>
+            <h3 className="font-semibold text-gray-900 mb-1">Disclaimer</h3>
+            <p>
+              This tool is provided &quot;as is&quot; without warranty of any kind, express or implied.
+              PolicyEngine assumes no responsibility for the security, accuracy, or confidentiality
+              of any data you choose to load into this application.
+            </p>
+          </div>
+
+          <div>
+            <h3 className="font-semibold text-gray-900 mb-1">User Responsibility</h3>
+            <p>
+              Users are solely responsible for ensuring they have appropriate rights to use any
+              data loaded into this dashboard and for compliance with applicable data protection
+              regulations.
+            </p>
+          </div>
+        </div>
+
+        <button
+          onClick={onClose}
+          className="mt-6 w-full bg-blue-600 hover:bg-blue-700 text-white font-medium py-2 px-4 rounded-md transition-colors"
+        >
+          Close
+        </button>
+      </div>
+    </div>
+  );
+}
+
 function HomeContent() {
   const [data, setData] = useState<ImputationDataPoint[]>([]);
   const [fileName, setFileName] = useState<string>('');
   const [showDashboard, setShowDashboard] = useState(false);
   const [isLoadingFromDeeplink, setIsLoadingFromDeeplink] = useState(false);
   const [githubArtifactInfo, setGithubArtifactInfo] = useState<GitHubArtifactInfo | null>(null);
+  const [showPrivacyModal, setShowPrivacyModal] = useState(false);
 
   const searchParams = useSearchParams();
   const deeplinkParams = parseDeeplinkParams(searchParams);
@@ -109,10 +158,20 @@ function HomeContent() {
               >
                 PolicyEngine.org
               </a>
+              {' • '}
+              <button
+                onClick={() => setShowPrivacyModal(true)}
+                className="text-blue-600 hover:text-blue-800"
+              >
+                Privacy & Terms
+              </button>
             </p>
           </div>
         </div>
       </footer>
+
+      {/* Privacy Modal */}
+      <PrivacyModal isOpen={showPrivacyModal} onClose={() => setShowPrivacyModal(false)} />
     </main>
   );
 }
 
@@ -372,7 +372,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps)
                   <span className="font-semibold text-gray-900">{bestModel.quantileLoss.toFixed(6)}</span>
                 </div>
                 {bestModel.quantileTrainTestRatio !== undefined && (
-                  <span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-amber-600' : 'text-gray-700'}`}>
+                  <span className={`text-xs ${bestModel.quantileTrainTestRatio > 1.1 ? 'text-gray-700' : 'text-gray-900'}`}>
                     Train/test ratio: {bestModel.quantileTrainTestRatio.toFixed(3)}
                   </span>
                 )}
 
@@ -11,8 +11,26 @@ import {
   Tooltip,
   Legend,
   ResponsiveContainer,
+  Brush,
 } from 'recharts';
 
+/**
+ * Format a number to scientific notation if it's very large or very small
+ * Returns the original number formatted normally if within reasonable range
+ */
+function formatLargeNumber(value: number, precision: number = 2): string {
+  const absValue = Math.abs(value);
+  // Use scientific notation for values >= 100,000 or <= 0.0001 (but not 0)
+  if (absValue >= 100000 || (absValue > 0 && absValue <= 0.0001)) {
+    return value.toExponential(precision);
+  }
+  // For smaller numbers, use fixed notation
+  if (absValue < 1 && absValue > 0) {
+    return value.toFixed(precision + 2);
+  }
+  return value.toFixed(precision);
+}
+
 interface DistributionOverlayProps {
   data: ImputationDataPoint[];
 }
@@ -72,14 +90,16 @@ export default function DistributionOverlay({
         const info = JSON.parse(d.additional_info);
 
         if (d.metric_name === 'histogram_distribution') {
-          // Numerical variable
+          // Numerical variable - use scientific notation for large values
+          const binStartFormatted = formatLargeNumber(info.bin_start);
+          const binEndFormatted = formatLargeNumber(info.bin_end);
           (distributions[variable].data as BinData[]).push({
             binIndex: info.bin_index,
             binStart: info.bin_start,
             binEnd: info.bin_end,
             donorHeight: info.donor_height,
             receiverHeight: info.receiver_height,
-            binLabel: `${info.bin_start.toFixed(2)}-${info.bin_end.toFixed(2)}`,
+            binLabel: `${binStartFormatted}-${binEndFormatted}`,
           });
           distributions[variable].nSamplesDonor = info.n_samples_donor;
           distributions[variable].nSamplesReceiver = info.n_samples_receiver;
@@ -130,7 +150,7 @@ export default function DistributionOverlay({
 
     return (
       <div>
-        <ResponsiveContainer width="100%" height={400}>
+        <ResponsiveContainer width="100%" height={580}>
           <BarChart
             data={chartData}
             margin={{ top: 20, right: 30, left: 20, bottom: 60 }}
@@ -161,11 +181,19 @@ export default function DistributionOverlay({
             />
             <Tooltip
               formatter={(value: number) => [`${value.toFixed(2)}%`, '']}
-              labelFormatter={(label) => `Bin: ${label}`}
+              labelFormatter={(_label, payload) => {
+                if (payload && payload.length > 0 && payload[0].payload) {
+                  const { binStart, binEnd } = payload[0].payload;
+                  // Show full values with commas in tooltip for readability
+                  const startStr = binStart.toLocaleString(undefined, { maximumFractionDigits: 2 });
+                  const endStr = binEnd.toLocaleString(undefined, { maximumFractionDigits: 2 });
+                  return `Bin: ${startStr} - ${endStr}`;
+                }
+                return `Bin: ${_label}`;
+              }}
               contentStyle={{ color: '#000000' }}
               labelStyle={{ color: '#000000' }}
             />
-            <Legend wrapperStyle={{ color: '#000000', paddingTop: '10px' }} />
             <Bar
               dataKey="Donor"
               fill="#3b82f6"
@@ -178,10 +206,21 @@ export default function DistributionOverlay({
               fillOpacity={0.7}
               name={`Receiver (n=${dist.nSamplesReceiver})`}
             />
+            <Brush
+              dataKey="name"
+              height={30}
+              stroke="#8884d8"
+              fill="#f3f4f6"
+              tickFormatter={() => ''}
+            />
+            <Legend
+              verticalAlign="bottom"
+              wrapperStyle={{ color: '#000000', paddingTop: '45px' }}
+            />
           </BarChart>
         </ResponsiveContainer>
         <p className="text-xs text-gray-600 mt-2 text-center">
-          Histogram with {(dist.data as BinData[]).length} bins. Each bin shows the percentage of values falling within that range.
+          Histogram with {(dist.data as BinData[]).length} bins. Drag the handles below to zoom into a specific range.
           Overlapping bars indicate similar distributions.
         </p>
       </div>
 
@@ -14,6 +14,8 @@ interface DistributionMetric {
   method: string;
   metricName: string;
   value: number;
+  normalizedValue?: number;  // Wasserstein distance as percentage of variable range
+  variableRange?: number;    // Range of the variable for context
 }
 
 export default function ImputationResults({ data }: ImputationResultsProps) {
@@ -22,6 +24,34 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
     return data.filter(d => d.type === 'distribution_distance');
   }, [data]);
 
+  // Extract variable ranges from distribution_bins data
+  const variableRanges = useMemo(() => {
+    const ranges: Record<string, { min: number; max: number }> = {};
+    const distributionBins = data.filter(d => d.type === 'distribution_bins' && d.metric_name === 'histogram_distribution');
+
+    distributionBins.forEach(d => {
+      try {
+        const info = JSON.parse(d.additional_info);
+        const variable = d.variable;
+
+        if (!ranges[variable]) {
+          ranges[variable] = { min: Infinity, max: -Infinity };
+        }
+
+        if (info.bin_start !== undefined) {
+          ranges[variable].min = Math.min(ranges[variable].min, info.bin_start);
+        }
+        if (info.bin_end !== undefined) {
+          ranges[variable].max = Math.max(ranges[variable].max, info.bin_end);
+        }
+      } catch (e) {
+        // Ignore parsing errors
+      }
+    });
+
+    return ranges;
+  }, [data]);
+
   // Group by metric type
   const { wassersteinData, klDivergenceData } = useMemo(() => {
     const wasserstein: DistributionMetric[] = [];
@@ -36,21 +66,28 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
       };
 
       if (d.metric_name === 'wasserstein_distance') {
+        // Calculate normalized value as percentage of variable range
+        const range = variableRanges[d.variable];
+        if (range && range.max > range.min) {
+          const variableRange = range.max - range.min;
+          metric.variableRange = variableRange;
+          metric.normalizedValue = (metric.value / variableRange) * 100;
+        }
         wasserstein.push(metric);
       } else if (d.metric_name === 'kl_divergence') {
         klDiv.push(metric);
       }
     });
 
-    // Sort by value (ascending - lower is better)
-    wasserstein.sort((a, b) => a.value - b.value);
+    // Sort by normalized value if available, otherwise by raw value (ascending - lower is better)
+    wasserstein.sort((a, b) => (a.normalizedValue ?? a.value) - (b.normalizedValue ?? b.value));
     klDiv.sort((a, b) => a.value - b.value);
 
     return {
       wassersteinData: wasserstein,
       klDivergenceData: klDiv
     };
-  }, [distributionData]);
+  }, [distributionData, variableRanges]);
 
   const hasWasserstein = wassersteinData.length > 0;
   const hasKLDivergence = klDivergenceData.length > 0;
@@ -59,13 +96,17 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
     return null;
   }
 
-  // Color function based on value quality (lower is better)
-  const getWassersteinColor = (value: number): string => {
-    if (value < 0.01) return '#16a34a'; // Dark green - excellent
-    if (value < 0.05) return '#22c55e'; // Green - good
-    if (value < 0.1) return '#eab308'; // Yellow - moderate
-    if (value < 0.2) return '#f97316'; // Orange - fair
-    return '#ef4444'; // Red - poor
+  // Color function based on normalized value (percentage of range) - lower is better
+  const getWassersteinColor = (normalizedValue: number | undefined, rawValue: number): string => {
+    // Use normalized value if available, otherwise fall back to raw thresholds
+    const value = normalizedValue ?? (rawValue * 100);  // Assume raw is already a fraction if no range
+
+    // Thresholds as percentage of variable range
+    if (value < 1) return '#16a34a';   // Dark green - excellent (<1% of range)
+    if (value < 3) return '#22c55e';   // Green - good (<3% of range)
+    if (value < 5) return '#eab308';   // Yellow - moderate (<5% of range)
+    if (value < 10) return '#f97316';  // Orange - fair (<10% of range)
+    return '#ef4444';                   // Red - poor (>=10% of range)
   };
 
   const getKLColor = (value: number): string => {
@@ -112,9 +153,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
               greater differences between imputed and true distributions.
             </p>
             <p className="text-sm text-gray-700">
-              <strong>Interpretation:</strong> Values closer to 0 are better. Generally, values below
-              0.05 indicate good imputation quality, while values above 0.2 suggest significant
-              distributional differences.
+              <strong>Interpretation:</strong> Since Wasserstein distance is scale-dependent, quality is assessed
+              relative to each variable&apos;s range. A distance of &lt;1% of the variable range is excellent,
+              &lt;3% is good, &lt;5% is moderate, &lt;10% is fair, and &ge;10% suggests poor distributional match.
             </p>
           </div>
 
@@ -130,14 +171,21 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
                 <XAxis type="number" tick={{ fill: '#000000' }} />
                 <YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
                 <Tooltip
-                  formatter={(value: number) => [value.toFixed(6), 'Wasserstein Distance']}
+                  formatter={(value: number, _name: string, props: { payload?: DistributionMetric }) => {
+                    const normalizedValue = props.payload?.normalizedValue;
+                    const distanceStr = value.toFixed(6);
+                    const pctStr = normalizedValue !== undefined ? ` (${normalizedValue.toFixed(2)}% of range)` : '';
+                    return [`${distanceStr}${pctStr}`, 'Wasserstein Distance'];
+                  }}
+                  contentStyle={{ color: '#000000' }}
+                  labelStyle={{ color: '#000000' }}
                 />
                 <Legend wrapperStyle={{ color: '#000000' }} />
                 <Bar dataKey="value" name="Wasserstein Distance">
                   {wassersteinData.map((entry, index) => (
                     <Cell
                       key={`cell-${index}`}
-                      fill={getWassersteinColor(entry.value)}
+                      fill={getWassersteinColor(entry.normalizedValue, entry.value)}
                     />
                   ))}
                 </Bar>
@@ -156,6 +204,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
                   <th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
                     Wasserstein Distance
                   </th>
+                  <th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
+                    % of Range
+                  </th>
                   <th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
                     Quality Assessment
                   </th>
@@ -166,16 +217,19 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
                   let assessment = '';
                   let assessmentColor = '';
 
-                  if (item.value < 0.01) {
+                  // Use normalized value (percentage of range) for assessment
+                  const normalizedValue = item.normalizedValue ?? (item.value * 100);
+
+                  if (normalizedValue < 1) {
                     assessment = 'Excellent';
                     assessmentColor = 'text-green-700 font-semibold';
-                  } else if (item.value < 0.05) {
+                  } else if (normalizedValue < 3) {
                     assessment = 'Good';
                     assessmentColor = 'text-green-600';
-                  } else if (item.value < 0.1) {
+                  } else if (normalizedValue < 5) {
                     assessment = 'Moderate';
                     assessmentColor = 'text-yellow-600';
-                  } else if (item.value < 0.2) {
+                  } else if (normalizedValue < 10) {
                     assessment = 'Fair';
                     assessmentColor = 'text-orange-600';
                   } else {
@@ -191,6 +245,9 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
                       <td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
                         {item.value.toFixed(6)}
                       </td>
+                      <td className="px-4 py-3 whitespace-nowrap text-sm text-gray-700">
+                        {item.normalizedValue !== undefined ? `${item.normalizedValue.toFixed(2)}%` : 'N/A'}
+                      </td>
                       <td className={`px-4 py-3 whitespace-nowrap text-sm ${assessmentColor}`}>
                         {assessment}
                       </td>
@@ -243,6 +300,8 @@ export default function ImputationResults({ data }: ImputationResultsProps) {
                 <YAxis type="category" dataKey="variable" width={90} tick={{ fill: '#000000' }} />
                 <Tooltip
                   formatter={(value: number) => [value.toFixed(6), 'KL-Divergence']}
+                  contentStyle={{ color: '#000000' }}
+                  labelStyle={{ color: '#000000' }}
                 />
                 <Legend wrapperStyle={{ color: '#000000' }} />
                 <Bar dataKey="value" name="KL-Divergence">