-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhelpers.py
More file actions
76 lines (60 loc) · 2.36 KB
/
helpers.py
File metadata and controls
76 lines (60 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import pandas as pd
import numpy as np
import folium
from sklearn import preprocessing
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.colors as colors
import re
import itertools
import plotly.express as px
import plotly.graph_objects as go
# Features Selection
def features(df, Area=False, Item=False, Element=False, plot=False):
'''Inputs: the dataframe from FAO, the list of attributes we are interested in:
Area, Item, Element: insert list of values we want to keep
Output: the dataframe containing just the selected attributes
'''
# Preparation
if Area:
df = df[df.Area.isin(Area)]
if Item:
df = df[df.Item.isin(Item)]
if Element:
df = df[df.Element.isin(Element)]
df = df.drop(columns=["Area Code","Item Code","Element Code","Unit"])
df = df.melt(id_vars = ["Area","Item","Element"])\
.rename(columns={"variable":"Year"}) \
.pivot_table(index="Year",columns=["Area","Item","Element"], \
values="value") \
df.reset_index(inplace=True)
# Plot
if plot:
fig = go.Figure()
for comb in list(itertools.product(Area,Item,Element)):
fig.add_trace(go.Scatter(x=df.Year, y=df[comb], name=str(comb)))
fig.update_layout(title_text='Time Series with Rangeslider',
xaxis_rangeslider_visible=True)
fig.show()
return df
def compare_different_country(df, country=False, feature=False, plot = False):
''' Given in input a df and the features we want to compare, it returns a time series plot
IMPORTANT! Check the name of the columns to adapt the code
'''
if country:
df = df[df.Area.isin(country)]
if feature:
df = df[["Year","Area"] + feature]
df = df.melt(id_vars = ["Year","Area"], value_vars = feature). \
pivot_table(index="Year",columns=["Area","variable"], values = "value", aggfunc="first")
df.reset_index(inplace=True)
# Plot
if plot:
fig = go.Figure()
for comb in list(itertools.product(country,feature)):
fig.add_trace(go.Scatter(x=df.Year, y=df[comb], name=str(comb)))
fig.update_layout(title_text='Time Series with Rangeslider',
xaxis_rangeslider_visible=True)
fig.show()
return df