-
Notifications
You must be signed in to change notification settings - Fork 1
Add query for schemas, tables, and views inventory in Redshift #47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| -- ============================================================================= | ||
| -- QUERY: Schema-Level Storage Summary | ||
| -- ============================================================================= | ||
| -- This query provides a summary of each schema in the database, showing | ||
| -- the number of tables, views, total storage size, and row counts. | ||
| -- It also flags schemas that are empty or contain only views. | ||
| -- | ||
| -- USE CASE: Database capacity planning, identifying unused schemas, | ||
| -- storage allocation review, schema cleanup decisions. | ||
| -- | ||
| -- OUTPUT COLUMNS: | ||
| -- schema_name - The name of the schema | ||
| -- table_count - Number of tables in the schema | ||
| -- view_count - Number of views in the schema | ||
| -- size_mb - Total size of all tables in MB | ||
| -- size_gb - Total size of all tables in GB | ||
| -- total_rows - Sum of all rows across all tables | ||
| -- schema_status - 'empty', 'views_only', or 'has_tables' | ||
| -- | ||
| -- NOTE: The schema_status column helps identify schemas that may be | ||
| -- candidates for cleanup (empty) or that only contain view definitions. | ||
| -- ============================================================================= | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- CTE 1: all_schemas | ||
| -- ----------------------------------------------------------------------------- | ||
| -- Get a complete list of all user-created schemas from pg_namespace. | ||
| -- This ensures we capture schemas that might be empty or only have views, | ||
| -- which wouldn't appear in svv_table_info. | ||
| -- ----------------------------------------------------------------------------- | ||
| WITH all_schemas AS ( | ||
| SELECT nspname AS schema_name | ||
| FROM pg_namespace | ||
| WHERE nspname NOT IN ( | ||
| 'pg_catalog', -- PostgreSQL system catalog | ||
| 'information_schema', -- SQL standard metadata schema | ||
| 'pg_internal', -- Redshift internal schema | ||
| 'pg_auto_copy', -- Redshift auto-copy schema | ||
| 'pg_automv', -- Redshift auto materialized views | ||
| 'pg_mv', -- Redshift materialized views metadata | ||
| 'pg_s3' -- Redshift Spectrum S3 schema | ||
| ) | ||
| AND nspname NOT LIKE 'pg_temp%' -- Exclude temporary schemas | ||
| AND nspname NOT LIKE 'pg_toast%' -- Exclude TOAST storage schemas | ||
| ), | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- CTE 2: table_sizes | ||
| -- ----------------------------------------------------------------------------- | ||
| -- Aggregate table statistics by schema from svv_table_info. | ||
| -- This gives us the count of tables, total size, and total rows per schema. | ||
| -- Schemas with no tables won't appear here (handled by LEFT JOIN later). | ||
| -- ----------------------------------------------------------------------------- | ||
| table_sizes AS ( | ||
| SELECT | ||
| "schema" AS schema_name, | ||
| COUNT(DISTINCT "table") AS table_count, | ||
| ROUND(SUM(size)::NUMERIC, 2) AS size_mb, | ||
| ROUND(SUM(size)::NUMERIC / 1024, 2) AS size_gb, | ||
| ROUND(SUM(tbl_rows)::NUMERIC, 0) AS total_rows | ||
| FROM svv_table_info | ||
| WHERE "schema" NOT IN ('pg_catalog', 'information_schema', 'pg_internal') | ||
|
||
| GROUP BY "schema" | ||
| ), | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- CTE 3: view_counts | ||
| -- ----------------------------------------------------------------------------- | ||
| -- Count views per schema using pg_class (relkind = 'v' indicates a view). | ||
| -- This is joined with pg_namespace to get the schema name. | ||
| -- Schemas with no views won't appear here (handled by LEFT JOIN later). | ||
| -- ----------------------------------------------------------------------------- | ||
| view_counts AS ( | ||
| SELECT | ||
| n.nspname AS schema_name, | ||
| COUNT(*) AS view_count | ||
| FROM pg_class c | ||
| JOIN pg_namespace n ON c.relnamespace = n.oid | ||
| WHERE c.relkind = 'v' -- 'v' = view in pg_class | ||
| AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_internal') | ||
|
||
| GROUP BY n.nspname | ||
| ) | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- Main SELECT: Join all CTEs and calculate schema_status | ||
| -- ----------------------------------------------------------------------------- | ||
| -- LEFT JOINs ensure we get all schemas even if they have no tables or views. | ||
| -- COALESCE converts NULLs to 0 for schemas missing from table_sizes or view_counts. | ||
| -- The CASE statement determines the schema_status based on object counts. | ||
| -- ----------------------------------------------------------------------------- | ||
| SELECT | ||
| s.schema_name, | ||
| COALESCE(t.table_count, 0) AS table_count, | ||
| COALESCE(v.view_count, 0) AS view_count, | ||
| COALESCE(t.size_mb, 0) AS size_mb, | ||
| COALESCE(t.size_gb, 0) AS size_gb, | ||
| COALESCE(t.total_rows, 0) AS total_rows, | ||
| CASE | ||
| WHEN COALESCE(t.table_count, 0) = 0 AND COALESCE(v.view_count, 0) = 0 THEN 'empty' | ||
| WHEN COALESCE(t.table_count, 0) = 0 AND COALESCE(v.view_count, 0) > 0 THEN 'views_only' | ||
| ELSE 'has_tables' | ||
| END AS schema_status | ||
| FROM all_schemas s | ||
| LEFT JOIN table_sizes t ON s.schema_name = t.schema_name | ||
| LEFT JOIN view_counts v ON s.schema_name = v.schema_name | ||
| ORDER BY s.schema_name ASC; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| -- ============================================================================= | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we add a note that you need access to the system tables to be able to run this query? |
||
| -- QUERY: Individual Tables and Views Inventory | ||
| -- ============================================================================= | ||
| -- This query returns a comprehensive list of all tables and views in the | ||
| -- Redshift database, along with their size and row count information. | ||
| -- | ||
| -- USE CASE: Database inventory, storage analysis, identifying large tables, | ||
| -- finding unused objects, or auditing database contents. | ||
| -- | ||
| -- OUTPUT COLUMNS: | ||
| -- schema_name - The schema containing the object | ||
| -- table_name - The name of the table or view | ||
| -- object_type - Either 'table' or 'view' | ||
| -- size_mb - Size in megabytes (0 for views since they don't store data) | ||
| -- size_gb - Size in gigabytes (0 for views) | ||
| -- row_count - Number of rows (0 for views) | ||
| -- | ||
| -- NOTE: Views show 0 for size and row_count because views are stored queries, | ||
| -- not physical data. They don't consume storage space. | ||
| -- ============================================================================= | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- PART 1: Get all tables from svv_table_info | ||
| -- ----------------------------------------------------------------------------- | ||
| -- svv_table_info is a Redshift system view that contains information about | ||
| -- all user-defined tables including their size and row counts. | ||
| -- We exclude system schemas that contain internal Redshift objects. | ||
| -- ----------------------------------------------------------------------------- | ||
| SELECT | ||
| schema_name::VARCHAR(256), | ||
| table_name::VARCHAR(256), | ||
| object_type::VARCHAR(10), | ||
| size_mb::NUMERIC(18,2), | ||
| size_gb::NUMERIC(18,2), | ||
| row_count::BIGINT | ||
| FROM ( | ||
| SELECT | ||
| "schema" AS schema_name, | ||
| "table" AS table_name, | ||
| 'table' AS object_type, | ||
| ROUND(size::NUMERIC, 2) AS size_mb, | ||
| ROUND(size::NUMERIC / 1024, 2) AS size_gb, | ||
| tbl_rows AS row_count | ||
| FROM svv_table_info | ||
| WHERE "schema" NOT IN ( | ||
| 'pg_catalog', -- PostgreSQL system catalog | ||
| 'information_schema', -- SQL standard metadata schema | ||
| 'pg_internal', -- Redshift internal schema | ||
| 'pg_auto_copy', -- Redshift auto-copy schema | ||
| 'pg_automv', -- Redshift auto materialized views | ||
| 'pg_mv', -- Redshift materialized views metadata | ||
| 'pg_s3' -- Redshift Spectrum S3 schema | ||
| ) | ||
|
Comment on lines
+45
to
+53
|
||
| ) t | ||
|
|
||
| UNION ALL | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- PART 2: Get all views from pg_views | ||
| -- ----------------------------------------------------------------------------- | ||
| -- pg_views is a system catalog that lists all views in the database. | ||
| -- Views don't have size or row count since they're just stored SQL queries | ||
| -- that execute against underlying tables when called. | ||
| -- ----------------------------------------------------------------------------- | ||
| SELECT | ||
| schema_name::VARCHAR(256), | ||
| table_name::VARCHAR(256), | ||
| object_type::VARCHAR(10), | ||
| size_mb::NUMERIC(18,2), | ||
| size_gb::NUMERIC(18,2), | ||
| row_count::BIGINT | ||
| FROM ( | ||
| SELECT | ||
| schemaname AS schema_name, | ||
| viewname AS table_name, | ||
| 'view' AS object_type, | ||
| 0 AS size_mb, | ||
| 0 AS size_gb, | ||
| 0 AS row_count | ||
| FROM pg_views | ||
| WHERE schemaname NOT IN ( | ||
| 'pg_catalog', | ||
| 'information_schema', | ||
| 'pg_internal', | ||
| 'pg_auto_copy', | ||
| 'pg_automv', | ||
| 'pg_mv', | ||
| 'pg_s3' | ||
| ) | ||
|
Comment on lines
+81
to
+89
|
||
| ) v | ||
|
|
||
| -- ----------------------------------------------------------------------------- | ||
| -- Sort by schema, then object type (tables first), then object name | ||
| -- ----------------------------------------------------------------------------- | ||
| ORDER BY 1, 3, 2; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
File location inconsistency: This file is located in the root directory while the related file
table_memory_usage.sqlis in thesql/directory. For better organization and consistency, both files should be in the same directory, preferablysql/since it already exists and contains SQL queries.