diff --git a/schema_memory_usage.sql b/schema_memory_usage.sql new file mode 100644 index 0000000..7e55082 --- /dev/null +++ b/schema_memory_usage.sql @@ -0,0 +1,106 @@ +-- ============================================================================= +-- QUERY: Schema-Level Storage Summary +-- ============================================================================= +-- This query provides a summary of each schema in the database, showing +-- the number of tables, views, total storage size, and row counts. +-- It also flags schemas that are empty or contain only views. +-- +-- USE CASE: Database capacity planning, identifying unused schemas, +-- storage allocation review, schema cleanup decisions. +-- +-- OUTPUT COLUMNS: +-- schema_name - The name of the schema +-- table_count - Number of tables in the schema +-- view_count - Number of views in the schema +-- size_mb - Total size of all tables in MB +-- size_gb - Total size of all tables in GB +-- total_rows - Sum of all rows across all tables +-- schema_status - 'empty', 'views_only', or 'has_tables' +-- +-- NOTE: The schema_status column helps identify schemas that may be +-- candidates for cleanup (empty) or that only contain view definitions. +-- ============================================================================= + +-- ----------------------------------------------------------------------------- +-- CTE 1: all_schemas +-- ----------------------------------------------------------------------------- +-- Get a complete list of all user-created schemas from pg_namespace. +-- This ensures we capture schemas that might be empty or only have views, +-- which wouldn't appear in svv_table_info. +-- ----------------------------------------------------------------------------- +WITH all_schemas AS ( + SELECT nspname AS schema_name + FROM pg_namespace + WHERE nspname NOT IN ( + 'pg_catalog', -- PostgreSQL system catalog + 'information_schema', -- SQL standard metadata schema + 'pg_internal', -- Redshift internal schema + 'pg_auto_copy', -- Redshift auto-copy schema + 'pg_automv', -- Redshift auto materialized views + 'pg_mv', -- Redshift materialized views metadata + 'pg_s3' -- Redshift Spectrum S3 schema + ) + AND nspname NOT LIKE 'pg_temp%' -- Exclude temporary schemas + AND nspname NOT LIKE 'pg_toast%' -- Exclude TOAST storage schemas +), + +-- ----------------------------------------------------------------------------- +-- CTE 2: table_sizes +-- ----------------------------------------------------------------------------- +-- Aggregate table statistics by schema from svv_table_info. +-- This gives us the count of tables, total size, and total rows per schema. +-- Schemas with no tables won't appear here (handled by LEFT JOIN later). +-- ----------------------------------------------------------------------------- +table_sizes AS ( + SELECT + "schema" AS schema_name, + COUNT(DISTINCT "table") AS table_count, + ROUND(SUM(size)::NUMERIC, 2) AS size_mb, + ROUND(SUM(size)::NUMERIC / 1024, 2) AS size_gb, + ROUND(SUM(tbl_rows)::NUMERIC, 0) AS total_rows + FROM svv_table_info + WHERE "schema" NOT IN ('pg_catalog', 'information_schema', 'pg_internal') + GROUP BY "schema" +), + +-- ----------------------------------------------------------------------------- +-- CTE 3: view_counts +-- ----------------------------------------------------------------------------- +-- Count views per schema using pg_class (relkind = 'v' indicates a view). +-- This is joined with pg_namespace to get the schema name. +-- Schemas with no views won't appear here (handled by LEFT JOIN later). +-- ----------------------------------------------------------------------------- +view_counts AS ( + SELECT + n.nspname AS schema_name, + COUNT(*) AS view_count + FROM pg_class c + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE c.relkind = 'v' -- 'v' = view in pg_class + AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_internal') + GROUP BY n.nspname +) + +-- ----------------------------------------------------------------------------- +-- Main SELECT: Join all CTEs and calculate schema_status +-- ----------------------------------------------------------------------------- +-- LEFT JOINs ensure we get all schemas even if they have no tables or views. +-- COALESCE converts NULLs to 0 for schemas missing from table_sizes or view_counts. +-- The CASE statement determines the schema_status based on object counts. +-- ----------------------------------------------------------------------------- +SELECT + s.schema_name, + COALESCE(t.table_count, 0) AS table_count, + COALESCE(v.view_count, 0) AS view_count, + COALESCE(t.size_mb, 0) AS size_mb, + COALESCE(t.size_gb, 0) AS size_gb, + COALESCE(t.total_rows, 0) AS total_rows, + CASE + WHEN COALESCE(t.table_count, 0) = 0 AND COALESCE(v.view_count, 0) = 0 THEN 'empty' + WHEN COALESCE(t.table_count, 0) = 0 AND COALESCE(v.view_count, 0) > 0 THEN 'views_only' + ELSE 'has_tables' + END AS schema_status +FROM all_schemas s +LEFT JOIN table_sizes t ON s.schema_name = t.schema_name +LEFT JOIN view_counts v ON s.schema_name = v.schema_name +ORDER BY s.schema_name ASC; diff --git a/sql/table_memory_usage.sql b/sql/table_memory_usage.sql new file mode 100644 index 0000000..a7cd2b9 --- /dev/null +++ b/sql/table_memory_usage.sql @@ -0,0 +1,95 @@ +-- ============================================================================= +-- QUERY: Individual Tables and Views Inventory +-- ============================================================================= +-- This query returns a comprehensive list of all tables and views in the +-- Redshift database, along with their size and row count information. +-- +-- USE CASE: Database inventory, storage analysis, identifying large tables, +-- finding unused objects, or auditing database contents. +-- +-- OUTPUT COLUMNS: +-- schema_name - The schema containing the object +-- table_name - The name of the table or view +-- object_type - Either 'table' or 'view' +-- size_mb - Size in megabytes (0 for views since they don't store data) +-- size_gb - Size in gigabytes (0 for views) +-- row_count - Number of rows (0 for views) +-- +-- NOTE: Views show 0 for size and row_count because views are stored queries, +-- not physical data. They don't consume storage space. +-- ============================================================================= + +-- ----------------------------------------------------------------------------- +-- PART 1: Get all tables from svv_table_info +-- ----------------------------------------------------------------------------- +-- svv_table_info is a Redshift system view that contains information about +-- all user-defined tables including their size and row counts. +-- We exclude system schemas that contain internal Redshift objects. +-- ----------------------------------------------------------------------------- +SELECT + schema_name::VARCHAR(256), + table_name::VARCHAR(256), + object_type::VARCHAR(10), + size_mb::NUMERIC(18,2), + size_gb::NUMERIC(18,2), + row_count::BIGINT +FROM ( + SELECT + "schema" AS schema_name, + "table" AS table_name, + 'table' AS object_type, + ROUND(size::NUMERIC, 2) AS size_mb, + ROUND(size::NUMERIC / 1024, 2) AS size_gb, + tbl_rows AS row_count + FROM svv_table_info + WHERE "schema" NOT IN ( + 'pg_catalog', -- PostgreSQL system catalog + 'information_schema', -- SQL standard metadata schema + 'pg_internal', -- Redshift internal schema + 'pg_auto_copy', -- Redshift auto-copy schema + 'pg_automv', -- Redshift auto materialized views + 'pg_mv', -- Redshift materialized views metadata + 'pg_s3' -- Redshift Spectrum S3 schema + ) +) t + +UNION ALL + +-- ----------------------------------------------------------------------------- +-- PART 2: Get all views from pg_views +-- ----------------------------------------------------------------------------- +-- pg_views is a system catalog that lists all views in the database. +-- Views don't have size or row count since they're just stored SQL queries +-- that execute against underlying tables when called. +-- ----------------------------------------------------------------------------- +SELECT + schema_name::VARCHAR(256), + table_name::VARCHAR(256), + object_type::VARCHAR(10), + size_mb::NUMERIC(18,2), + size_gb::NUMERIC(18,2), + row_count::BIGINT +FROM ( + SELECT + schemaname AS schema_name, + viewname AS table_name, + 'view' AS object_type, + 0 AS size_mb, + 0 AS size_gb, + 0 AS row_count + FROM pg_views + WHERE schemaname NOT IN ( + 'pg_catalog', + 'information_schema', + 'pg_internal', + 'pg_auto_copy', + 'pg_automv', + 'pg_mv', + 'pg_s3' + ) +) v + +-- ----------------------------------------------------------------------------- +-- Sort by schema, then object type (tables first), then object name +-- ----------------------------------------------------------------------------- +ORDER BY 1, 3, 2;