Skip to content

Commit df731e6

Browse files
authored
Merge pull request #18 from codegreen-framework/main
0.0.5
2 parents 0284b8b + dff3aec commit df731e6

File tree

11 files changed

+197
-131
lines changed

11 files changed

+197
-131
lines changed

.github/workflows/workflow.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Publish Python 🐍 distribution 📦 to PyPI
1+
name: Publish to PyPI
22

33
on:
44
push:
@@ -7,7 +7,7 @@ on:
77

88
jobs:
99
build:
10-
name: Build distribution 📦
10+
name: Build distribution
1111
runs-on: ubuntu-latest
1212

1313
steps:
@@ -30,7 +30,7 @@ jobs:
3030
path: dist/
3131

3232
publish-to-pypi:
33-
name: Publish Python 🐍 distribution 📦 to PyPI
33+
name: Publish to PyPI
3434
needs:
3535
- build
3636
runs-on: ubuntu-latest

codegreen_core/data/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
from .main import *
2+
from .offline import *
3+
# from . import main
24

3-
__all__ = ["energy"]
5+
__all__ = ["info","energy","sync_offline_data",'get_offline_data']

codegreen_core/data/entsoe.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
"Biomass": ["Biomass"],
4848
}
4949

50+
51+
5052
# helper methods
5153

5254

@@ -287,6 +289,7 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
287289
- `data_available`: A boolean indicating if data was successfully retrieved.
288290
- `data`: A pandas DataFrame containing the energy data if available, empty DataFrame if not.
289291
- `time_interval` : the time interval of the DataFrame
292+
- `columns` : a dict with column description
290293
:rtype: dict
291294
"""
292295
try:
@@ -371,6 +374,7 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
371374
"data": _format_energy_data(table),
372375
"data_available": True,
373376
"time_interval": duration,
377+
"columns":gen_cols_from_data(table)
374378
}
375379
except Exception as e:
376380
# print(e)
@@ -380,8 +384,38 @@ def get_actual_production_percentage(country, start, end, interval60=False) -> d
380384
"data_available": False,
381385
"error": e,
382386
"time_interval": 0,
387+
"columns":None
383388
}
384389

390+
def gen_cols_from_data(df):
391+
"""generates list of columns for the given energy generation dataframe"""
392+
allAddkeys = [
393+
"Wind",
394+
"Solar",
395+
"Nuclear",
396+
"Hydroelectricity",
397+
"Geothermal",
398+
"Natural Gas",
399+
"Petroleum",
400+
"Coal",
401+
"Biomass",
402+
]
403+
404+
allCols = df.columns.tolist()
405+
# find out which columns are present in the data out of all the possible columns in both the categories
406+
renPresent = list(set(allCols).intersection(renewableSources))
407+
nonRenPresent = list(set(allCols).intersection(nonRenewableSources))
408+
409+
cols = {
410+
"renewable" : renPresent,
411+
"nonRenewable": nonRenPresent,
412+
"percentage":[]
413+
}
414+
for ky in allAddkeys:
415+
fieldName = ky + "_per"
416+
cols["percentage"].append(fieldName)
417+
return cols
418+
385419

386420
def get_forecast_percent_renewable(
387421
country: str, start: datetime, end: datetime

codegreen_core/data/main.py

Lines changed: 94 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,54 +10,76 @@
1010

1111
def energy(country, start_time, end_time, type="generation") -> dict:
1212
"""
13-
Returns hourly time series of energy production mix for a specified country and time range.
14-
15-
This method fetches the energy data for the specified country between the specified duration.
16-
It checks if a valid energy data source is available. If not, None is returned. Otherwise, the
17-
energy data is returned as a pandas DataFrame. The structure of data depends on the energy source.
18-
19-
For example, if the source is ENTSOE, the data contains:
20-
21-
========================== ========== ================================================================
22-
Column type Description
23-
========================== ========== ================================================================
24-
startTimeUTC object Start date in UTC (format YYYYMMDDhhmm)
25-
startTime datetime Start time in local timezone
26-
Biomass float64
27-
Fossil Hard coal float64
28-
Geothermal float64
29-
....more energy sources float64
30-
**renewableTotal** float64 The total based on all renewable sources
31-
renewableTotalWS float64 The total production using only Wind and Solar energy sources
32-
nonRenewableTotal float64
33-
total float64 Total using all energy sources
34-
percentRenewable int64
35-
percentRenewableWS int64 Percentage of energy produced using only wind and solar energy
36-
Wind_per int64 Percentages of individual energy sources
37-
Solar_per int64
38-
Nuclear_per int64
39-
Hydroelectricity_per int64
40-
Geothermal_per int64
41-
Natural Gas_per int64
42-
Petroleum_per int64
43-
Coal_per int64
44-
Biomass_per int64
45-
========================== ========== ================================================================
46-
47-
Note : fields marked bold are calculated based on the data fetched.
48-
49-
:param str country: The 2 alphabet country code.
50-
:param datetime start_time: The start date for data retrieval. A Datetime object. Note that this date will be rounded to the nearest hour.
51-
:param datetime end_time: The end date for data retrieval. A datetime object. This date is also rounded to the nearest hour.
52-
:param str type: The type of data to retrieve; either 'generation' or 'forecast'. Defaults to 'generation'.
53-
:param boolean interval60: To fix the time interval of data to 60 minutes. True by default. Only applicable for generation data
54-
55-
:return: A dictionary containing:
56-
- `error`: A string with an error message, empty if no errors.
57-
- `data_available`: A boolean indicating if data was successfully retrieved.
58-
- `data`: A pandas DataFrame containing the energy data if available, empty DataFrame if not.
59-
- `time_interval` : the time interval of the DataFrame
13+
Returns an hourly time series of the energy production mix for a specified country and time range,
14+
if a valid energy data source is available.
15+
16+
The data is returned as a pandas DataFrame along with additional metadata.
17+
The columns vary depending on the data source. For example, if the source is ENTSOE,
18+
the data includes fields such as "Biomass", "Geothermal", "Hydro Pumped Storage",
19+
"Hydro Run-of-river and Poundage", "Hydro Water Reservoir", etc.
20+
21+
However, some fields remain consistent across data sources:
22+
23+
========================= ========== ================================================================
24+
Column Type Description
25+
========================= ========== ================================================================
26+
startTimeUTC object Start time in UTC (format: YYYYMMDDhhmm)
27+
startTime datetime Start time in local timezone
28+
renewableTotal float64 The total production from all renewable sources
29+
renewableTotalWS float64 Total production using only Wind and Solar energy sources
30+
nonRenewableTotal float64 Total production from non-renewable sources
31+
total float64 Total energy production from all sources
32+
percentRenewable int64 Percentage of total energy from renewable sources
33+
percentRenewableWS int64 Percentage of energy from Wind and Solar only
34+
Wind_per int64 Percentage contribution from Wind energy
35+
Solar_per int64 Percentage contribution from Solar energy
36+
Nuclear_per int64 Percentage contribution from Nuclear energy
37+
Hydroelectricity_per int64 Percentage contribution from Hydroelectricity
38+
Geothermal_per int64 Percentage contribution from Geothermal energy
39+
Natural Gas_per int64 Percentage contribution from Natural Gas
40+
Petroleum_per int64 Percentage contribution from Petroleum
41+
Coal_per int64 Percentage contribution from Coal
42+
Biomass_per int64 Percentage contribution from Biomass
43+
========================= ========== ================================================================
44+
45+
:param str country:
46+
The 2-letter country code (e.g., "DE" for Germany, "FR" for France, etc.).
47+
:param datetime start_time:
48+
The start date for data retrieval (rounded to the nearest hour).
49+
:param datetime end_time:
50+
The end date for data retrieval (rounded to the nearest hour).
51+
:param str type:
52+
The type of data to retrieve; either 'generation' or 'forecast'. Defaults to 'generation'.
53+
54+
:return: A dictionary containing the following keys:
55+
56+
- **error** (*str*): An error message, empty if no errors occurred.
57+
- **data_available** (*bool*): Indicates whether data was successfully retrieved.
58+
- **data** (*pandas.DataFrame*): The retrieved energy data if available; an empty DataFrame otherwise.
59+
- **time_interval** (*int*): The time interval of the DataFrame (constant value: ``60``).
60+
- **source** (*str*): Specifies the origin of the retrieved data. Defaults to ``'public_data'``, indicating it was fetched from an external source. If the offline storage feature is enabled, this value may change if the data is available locally.
61+
- **columns** : a dict of columns for renewable and non renewable energy sources in the data
62+
6063
:rtype: dict
64+
65+
**Example Usage:**
66+
67+
Get generation data for Germany
68+
69+
.. code-block:: python
70+
71+
from datetime import datetime
72+
from codegreen_core.data import energy
73+
result = energy(country="DE", start_time=datetime(2025, 1, 1), end_time=datetime(2025, 1, 2), type="generation")
74+
75+
Get forecast data for Norway
76+
77+
.. code-block:: python
78+
79+
from datetime import datetime
80+
from codegreen_core.data import energy
81+
result = energy(country="NO", start_time=datetime(2025, 1, 1), end_time=datetime(2025, 1, 2), type="forecast")
82+
6183
"""
6284
if not isinstance(country, str):
6385
raise ValueError("Invalid country")
@@ -75,27 +97,41 @@ def energy(country, start_time, end_time, type="generation") -> dict:
7597
e_source = meta.get_country_energy_source(country)
7698
if e_source == "ENTSOE":
7799
if type == "generation":
78-
"""
79-
let local_found= false
80-
see if caching is enabled, if yes, first check in the cache
81-
if not,
82-
check if offline data is enabled
83-
if yes, check is data is available locally
84-
if no, go online
85-
"""
86100
offline_data = off.get_offline_data(country,start_time,end_time)
87101
if offline_data["available"] is True and offline_data["partial"] is False and offline_data["data"] is not None:
88102
# todo fix this if partial get remaining data and merge instead of fetching the complete data
89-
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"]}
103+
return {"data":offline_data["data"],"data_available":True,"error":"None","time_interval":60,"source":offline_data["source"],"columns":et.gen_cols_from_data(offline_data["data"])}
90104
else:
91105
energy_data = et.get_actual_production_percentage(country, start_time, end_time, interval60=True)
92-
energy_data["data"] = energy_data["data"]
106+
#energy_data["data"] = energy_data["data"]
93107
energy_data["source"] = "public_data"
108+
#energy_data["columns"] =
94109
return energy_data
95110
elif type == "forecast":
96111
energy_data = et.get_forecast_percent_renewable(country, start_time, end_time)
97-
energy_data["data"] = energy_data["data"]
112+
# energy_data["data"] = energy_data["data"]
98113
return energy_data
99114
else:
100115
raise CodegreenDataError(Message.NO_ENERGY_SOURCE)
101116
return None
117+
118+
def info()-> list:
119+
"""
120+
Returns a list of countries (in two-letter codes) and energy sources for which data can be fetched using the package.
121+
122+
:return: A list of dictionary containing:
123+
124+
- name of the country
125+
- `energy_source` : the publicly available energy data source
126+
- `carbon_intensity_method` : the methodology used to calculate carbon intensity
127+
- `code` : the 2 letter country code
128+
129+
:rtype: list
130+
"""
131+
data = meta.get_country_metadata()
132+
data_list = []
133+
for key , value in data.items():
134+
c = value
135+
c["code"] = key
136+
data_list.append(c)
137+
return data_list

codegreen_core/data/offline.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -243,16 +243,27 @@ def _get_offline_cache_data(country,start,end):
243243
return False,None
244244

245245

246-
def get_offline_data(country,start,end,sync_first=False):
246+
def get_offline_data(country,start,end,sync_first=False)->dict:
247247
"""
248-
This method returns locally stored energy data.
249-
Data is stored in 2 sources : one. Redis cache and second : csv files.
250-
Redis cache contains data only for the last 72 hours from when it was last synced
251-
Offline data files can contain data for longer durations.
252-
Both these options can be configured in the config file
253-
returns {available:True/False, data:dataframe}
254-
Note that this method assumes that syncing of the sources is being handled separately
248+
This method returns locally stored energy data.
249+
250+
Data is stored in two sources:
251+
252+
1. **Redis cache**: Contains data for a limited number of hours from the last sync.
253+
2. **CSV files**: Contain data for longer durations.
254+
255+
Both storage options can be configured in the configuration file.
256+
257+
**Note**: Unless you specify the ``sync_first`` flag, the method assumes that syncing of the data sources is handled separately. If ``sync_first`` is set to ``True`` and data files are not initialized in advance, the method may take longer to complete
258+
259+
:return: A dictionary with the following keys:
260+
- **available** (*bool*): Indicates if the data is available.
261+
- **data** (*pandas.DataFrame*): The energy data, if available. Otherwise, an empty DataFrame.
262+
263+
:rtype: dict
264+
255265
"""
266+
256267
output = {"available":False,"data":None, "partial":False,"source":""}
257268
offline = Config.get("enable_offline_energy_generation")
258269
cache = Config.get("enable_energy_caching")
@@ -264,7 +275,7 @@ def get_offline_data(country,start,end,sync_first=False):
264275
if cache :
265276
# first look in the cache
266277
if(sync_first):
267-
print("will first sync the cache to get the latest data")
278+
#print("will first sync the cache to get the latest data")
268279
_sync_offline_cache(country)
269280
partial,data = _get_offline_cache_data(country,start,end)
270281
if data is not None and partial is False:
@@ -278,37 +289,39 @@ def get_offline_data(country,start,end,sync_first=False):
278289
if offline:
279290
# first look if data files are available, if yes, return data
280291
if(sync_first):
281-
print("will first sync the offline files to get the latest data")
292+
#print("will first sync the offline files to get the latest data")
282293
_sync_offline_file(country)
283294
partial,data = _get_offline_file_data(country,start,end)
284295
output["partial"] = partial
285296
output["data"] = data
286297
output["available"] = True
287298
output["source"] = "offline_file"
288-
print("just got the data from offline file")
299+
#print("just got the data from offline file")
289300

290301
return output
291302

292303

293304
def sync_offline_data(file=False,cache=False):
294305
"""
295-
This method syncs offline data for offline sources enabled in the cache.
296-
Data is synced for all available countries
297-
You need to run this before getting offline data. you can even setup a CRON job to call this method on regular intervals
306+
This method syncs offline data for offline sources enabled in the configuration file. The data is synced for all available countries.
307+
308+
You need to run this method before retrieving offline data. It is also possible to set up a CRON job to call this method at regular intervals to keep data synchronized.
309+
310+
The sync operation can take some time, depending on the data size and the selected sync options (file, cache, or both).
311+
312+
:param bool file: If ``True``, sync data in offline files. Defaults to ``False``.
313+
:param bool cache: If ``True``, sync data in the cache. Defaults to ``False``.
298314
"""
299315
c_keys = meta.get_country_metadata()
300316
if Config.get("enable_offline_energy_generation") == True and file == True:
301317
for key in c_keys:
302318
try:
303319
_sync_offline_file(key)
304320
except Exception as e:
305-
# print(e)
306321
log_stuff("Error in syncing offline file for "+key+". Message"+ str(e))
307322
if Config.get("enable_energy_caching") == True and cache == True :
308323
for key in c_keys:
309324
try:
310325
_sync_offline_cache(key)
311326
except Exception as e:
312-
# print(e)
313327
log_stuff("Error in syncing offline file for "+key+". Message: "+ str(e))
314-

codegreen_core/tools/loadshift_time.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,31 @@ def predict_now(
7070
:type criteria: str
7171
:return: Tuple[timestamp, message, average_percent_renewable]
7272
:rtype: tuple
73+
74+
**Example usage**:
75+
76+
.. code-block:: python
77+
78+
from datetime import datetime,timedelta
79+
from codegreen_core.tools.loadshift_time import predict_now
80+
81+
country_code = "DK"
82+
est_runtime_hour = 10
83+
est_runtime_min = 0
84+
now = datetime.now()
85+
hard_finish_date = now + timedelta(days=1)
86+
criteria = "percent_renewable"
87+
per_renewable = 50
88+
89+
time = predict_now(country_code,
90+
est_runtime_hour,
91+
est_runtime_min,
92+
hard_finish_date,
93+
criteria,
94+
per_renewable)
95+
# (1728640800.0, <Message.OPTIMAL_TIME: 'OPTIMAL_TIME'>, 76.9090909090909)
96+
97+
7398
"""
7499
if criteria == "percent_renewable":
75100
try:

docs/_static/modules.png

-11.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)