-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathorganize.py
More file actions
42 lines (32 loc) · 1.68 KB
/
organize.py
File metadata and controls
42 lines (32 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pandas as pd
import sqlite3
# I want to demonstrate proficiency with SQL.
# To accomplish this I will be using the sqlite3 library
# and the data we have previously generated and manipulated.
# Sqlite3 allows me to create a database in memory and run queries on it.
def main():
# :memory: is a special name that creates a database in memory
conn = sqlite3.connect(':memory:')
# Create a cursor object to interact with the database
cur = conn.cursor()
# Create Pandas DataFrame Objects to store our data
data = pd.read_csv('fixedData.csv')
data.dropna(inplace = True)
# Use that object to create our new in-memory database
data.to_sql('data', conn, if_exists='replace', index=False)
# Example basic query using our cursor (cur) object
cursorQuery = cur.execute('SELECT * FROM data LIMIT 10')
print("Basic query utilizing in-memory database:", "\n", cursorQuery.fetchall())
# More complex query using Pandas to store the data as a Pandas DataFrame Object
query = "SELECT GENDER, COUNT(*) AS Number FROM data d WHERE d.Age > 50 GROUP BY d.Gender"
queryDF = pd.read_sql(query, conn)
conn.close()
# Let's look at some simpler ways to achieve the same results, using Pandas, without an in-memory database
# Refer to line 15 where we read in the .csv file
womenOver50 = data[(data['Age'] > 50) & (data['Gender'] == 'F')]
menOver50 = data[(data['Age'] > 50) & (data['Gender'] == 'M')]
print("Query using Pandas.read_sql() method:", "\n", queryDF)
print("Count of women over 50 using simpler method:", len(womenOver50))
print("Count of men over 50 using simpler method:", len(menOver50))
if __name__ == '__main__':
main()