Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions calculate_largest_expensors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
Approach:
1. Aggregate data from EXPENSES table to calcualte total expenses per employee
2. Join EMPLOYEE table twice to display info about both employee and their manager
3. Order output as per request

*/

SELECT
em.employee_id
,em.first_name || ' ' || em.last_name AS employee_name
,m.employee_id AS manager_id
,m.first_name || ' ' || m.last_name AS manager_name
,ex.total_expensed_amount
FROM (
SELECT
employee_id
,SUM(unit_price * quantity) AS total_expensed_amount
FROM EXPENSES
GROUP BY employee_id
) ex
LEFT JOIN EMPLOYEE em ON em.employee_id = ex.employee_id
LEFT JOIN EMPLOYEE m ON em.manager_id = m.employee_id
ORDER BY total_expensed_amount DESC
;
32 changes: 32 additions & 0 deletions create_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
Approach:
1. Create DDL for table
2. Truncate target table to avoid duplication when script is run multiple times
3. Insert data to target table as no transformations are needed

*/

CREATE TABLE IF NOT EXISTS EMPLOYEE (
employee_id TINYINT,
first_name VARCHAR,
last_name VARCHAR,
job_title VARCHAR,
manager_id TINYINT
)
;

TRUNCATE TABLE EMPLOYEE
;

INSERT INTO EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id)
VALUES
(1,'Ian','James','CEO',4),
(2,'Umberto','Torrielli','CSO',1),
(3,'Alex','Jacobson','MD EMEA',2),
(4,'Darren','Poynton','CFO',2),
(5,'Tim','Beard','MD APAC',2),
(6,'Gemma','Dodd','COS',1),
(7,'Lisa','Platten','CHR',6),
(8,'Stefano','Camisaca','GM Activation',2),
(9,'Andrea','Ghibaudi','MD NAM',2)
;
46 changes: 46 additions & 0 deletions create_expenses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
Approach:
1. Create DDL for table
2. Create temp table that will contain data extracted from raw files without any transformations
3. Insert data to temp table
4. Truncate target table to avoid duplication when script is run multiple times
5. Transform data from temp table so that it is ready for target table
6. Load data to target table
7. Remove not needed temp table

*/

CREATE TABLE IF NOT EXISTS EXPENSES (
employee_id TINYINT,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

CREATE TABLE IF NOT EXISTS EXPENSES_tmp (
employee_name VARCHAR,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

INSERT INTO EXPENSES_tmp (employee_name, unit_price, quantity)
VALUES
('Alex Jacobson',6.50,14),
('Alex Jacobson',11.00,20),
('Alex Jacobson',22.00,18),
('Alex Jacobson',13.00,75),
('Andrea Ghibaudi',300,1),
('Darren Poynton',40.00,9),
('Umberto Torrielli',17.50,4);

TRUNCATE TABLE EXPENSES;

INSERT INTO EXPENSES
SELECT
em.employee_id
,ex.unit_price
,ex.quantity
FROM EXPENSES_tmp ex
INNER JOIN EMPLOYEE em ON ex.employee_name = em.first_name || ' ' || em.last_name
;

DROP TABLE EXPENSES_tmp;
65 changes: 65 additions & 0 deletions create_invoices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Approach:
1. Create DDL for tables
2. Create temp table that will contain data extracted from raw files without any transformations
3. Insert data to temp table
4. Truncate target tables to avoid duplication when script is run multiple times
5. Transform data from temp table so that it is ready for target tables
6. Load data to target table
7. Remove not needed temp table

*/

CREATE TABLE IF NOT EXISTS SUPPLIER (
supplier_id TINYINT,
NAME VARCHAR
);

CREATE TABLE IF NOT EXISTS INVOICE (
supplier_id TINYINT,
invoice_ammount DECIMAL(8, 2),
due_date date
);

CREATE TABLE IF NOT EXISTS INVOICE_tmp (
supplier VARCHAR,
invoice_ammount DECIMAL(8, 2),
months_due TINYINT
);

INSERT INTO INVOICE_tmp (supplier,invoice_ammount,months_due)
VALUES
('Party Animals',6000,3),
('Catering Plus',2000,2),
('Catering Plus',1500,3),
('Dave''s Discos',500,1),
('Entertainment tonight',6000,3),
('Ice Ice Baby',4000,6);

TRUNCATE TABLE INVOICE;
TRUNCATE TABLE SUPPLIER;

INSERT INTO SUPPLIER
SELECT
ROW_NUMBER() OVER (ORDER BY name) AS supplier_id
,name
FROM
(SELECT
supplier AS name
FROM INVOICE_tmp
GROUP BY supplier
) a
;


INSERT INTO INVOICE
SELECT
s.supplier_id
,tmp.invoice_ammount
,last_day_of_month(date_add('month', tmp.months_due-1, now())) AS due_date
FROM INVOICE_tmp tmp
LEFT JOIN SUPPLIER s ON s.name = tmp.supplier
;

DROP TABLE INVOICE_tmp;

41 changes: 41 additions & 0 deletions find_manager_cycles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Approach:
1. Define multiple joins to EMPLOYEE table. Each subsequent join point to manager of employee in previous step.
2. Collect path into array
3. Filter only those paths that are cycles
4. Ensure path ends in original employee

Theoretical explaination:
If employee is part of managerial cycle then after some number of steps cycle would go back to initial state, in this case to original employee.
After that the cycle will only repeat itself so there is no need to continue the search, hence the additional condition halting the process
before chain reverts itself.
In the most extreme case it will be a cycle of all 9 employees and it will go back to initial state at 10th step.
Thus, for all employees that are in managerial cycle e0.employee_id is NULL.
For employees that are not part of managerial cycle it's impossible to go back to original employee_id by following subsequent managers
Thus, for these employees e0.employee_id is not NULL.
*/

SELECT
employee_id
,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle
FROM
(
SELECT
e1.employee_id
,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle

FROM EMPLOYEE e1
LEFT JOIN EMPLOYEE e2 on e1.manager_id = e2.employee_id AND e1.employee_id <> e2.employee_id
LEFT JOIN EMPLOYEE e3 on e2.manager_id = e3.employee_id AND e1.employee_id <> e3.employee_id
LEFT JOIN EMPLOYEE e4 on e3.manager_id = e4.employee_id AND e1.employee_id <> e4.employee_id
LEFT JOIN EMPLOYEE e5 on e4.manager_id = e5.employee_id AND e1.employee_id <> e5.employee_id
LEFT JOIN EMPLOYEE e6 on e5.manager_id = e6.employee_id AND e1.employee_id <> e6.employee_id
LEFT JOIN EMPLOYEE e7 on e6.manager_id = e7.employee_id AND e1.employee_id <> e7.employee_id
LEFT JOIN EMPLOYEE e8 on e7.manager_id = e8.employee_id AND e1.employee_id <> e8.employee_id
LEFT JOIN EMPLOYEE e9 on e8.manager_id = e9.employee_id AND e1.employee_id <> e9.employee_id
LEFT JOIN EMPLOYEE e0 on e8.manager_id = e0.employee_id AND e1.employee_id <> e0.employee_id
WHERE e0.employee_id is NULL
)
;


85 changes: 85 additions & 0 deletions generate_insert_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#Aid used to extract data from files and to prepare SQL insert values queries.

import os

def check_column(msg, column_mapping, key, delimiter = ":"):
'''
Given a string (msg) verify that it contains data (key - value separated by delimiter)
and if it starts with desired value (column_mapping[key])
Output value that is stored in that string
'''
if delimiter in msg:
if msg[0:len(column_mapping[key])] == column_mapping[key]:
return msg[len(column_mapping[key])+2:].replace('\n', '')
return False

def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm):
'''
Function that goes through all files in location given by relative path (file_path) and compiles from each file row to be inserted into desired table
'''
wd = os.path.abspath(os.getcwd())
wd += file_path

sql_query = f"""INSERT INTO {table_name} ({','.join(column_mapping.keys())})
VALUES"""
vals = []
for file in os.listdir(wd):
f = open(f"{wd}/{file}", "r")
val = {}
for line in f:
for k in column_mapping.keys():
if check_column(line, column_mapping, k):
val[k] = check_column(line, column_mapping, k)
vals.append(val)
f.close()
print(vals)
for val in vals:
sql_query += f"""
("""
for k in column_mapping.keys():
column_value = val[k]

if k in column_with_qm:
column_value = column_value.replace("'", "''")
sql_query += "'"


if k in column_trim:
column_value = val[k].split(" ")[0]
sql_query += column_value

if k in column_with_qm:
sql_query += "'"

sql_query += ","

sql_query = sql_query[:-1]
sql_query += f"),"
sql_query = sql_query[:-1] + ';'
return sql_query



file_path = "/finance/receipts_from_last_night"
table_name = "EXPENSES_tmp"
column_mapping = { "employee_name": "Employee"
,"unit_price": "Unit Price"
,"quantity": "Quantity"
}
column_trim = []
column_with_qm = ["employee_name"]

print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm))



file_path = "/finance/invoices_due"
table_name = "INVOICE_tmp"
column_mapping = { "supplier": "Company Name"
,"invoice_ammount": "Invoice Amount"
,"months_due": "Due Date"
}
column_trim = ["months_due"]
column_with_qm = ["supplier"]

print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm))
53 changes: 53 additions & 0 deletions generate_supplier_payment_plans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
Approach:
1. For each invoice create array that will contain all dates at which payments will be done
2. Explode this array so that there is a seperate row for each payment x invoice
3. Calcualte the amount that should be paid out per this invoice
4. Wrap up this part in view for readibility
5. Aggregate all payments that are to be done on each payment date (in case there are multiple invoices to be paid out)
6. Use window function to calculate all remaining payments to be done for a given supplier
7. Add info about supplier

*/

CREATE OR REPLACE VIEW payment_dates AS
SELECT
supplier_id
,payment_ammount
,payment_date
FROM (
SELECT
supplier_id
,invoice_ammount/cardinality(payment_dates) AS payment_ammount
,payment_dates
FROM
(
SELECT
supplier_id
,due_date
,invoice_ammount
,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates
FROM INVOICE
) invoices_with_payment_days
) AS invoices (supplier_id,payment_ammount,payment_dates)
CROSS JOIN UNNEST(payment_dates) AS t(payment_date)
;

SELECT
p.supplier_id
,s.name AS supplier_name
,p.payment_ammount
,SUM(p.payment_ammount) OVER (PARTITION BY p.supplier_id ORDER BY p.payment_date DESC) - p.payment_ammount AS balance_outstanding
,p.payment_date

FROM (
SELECT
supplier_id
,payment_date
,sum(payment_ammount) AS payment_ammount
FROM payment_dates
GROUP BY supplier_id, payment_date
) p
LEFT JOIN SUPPLIER s ON p.supplier_id = s.supplier_id
ORDER BY p.supplier_id, p.payment_date
;