Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ITR/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class ColumnsConfig:
OWNED_EMISSIONS = "owned_emissions"
COUNTRY = 'country'
SECTOR = 'sector'
PRODUCTION = 'production'
GHG_SCOPE12 = 'ghg_s1s2'
GHG_SCOPE3 = 'ghg_s3'
COMPANY_REVENUE = 'company_revenue'
Expand All @@ -44,6 +45,7 @@ class ColumnsConfig:
BENCHMARK_TEMP = 'benchmark_temperature'
BENCHMARK_GLOBAL_BUDGET = 'benchmark_global_budget'
BASE_EI = 'emission_intensity_at_base_year'
PROJECTED_PRODUCTION = 'projected_production_units'
PROJECTED_EI = 'projected_intensities'
PROJECTED_TARGETS = 'projected_targets'
TRAJECTORY_SCORE = 'trajectory_score'
Expand Down
41 changes: 26 additions & 15 deletions ITR/data/base_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List, Type
from ITR.configs import ColumnsConfig, TemperatureScoreConfig
from ITR.data.data_providers import CompanyDataProvider, ProductionBenchmarkDataProvider, IntensityBenchmarkDataProvider
from ITR.interfaces import ICompanyData, EScope, IProductionBenchmarkScopes, IEmissionIntensityBenchmarkScopes, \
from ITR.interfaces import ICompanyData, PScope, IProductionBenchmarkScopes, IEmissionIntensityBenchmarkScopes, \
IBenchmark


Expand All @@ -28,7 +28,7 @@ def __init__(self,
self.temp_config = tempscore_config

def _convert_projections_to_series(self, company: ICompanyData, feature: str,
scope: EScope = EScope.S1S2) -> pd.Series:
scope: PScope = PScope.S1S2) -> pd.Series:
"""
extracts the company projected intensities or targets for a given scope
:param feature: PROJECTED_EI or PROJECTED_TARGETS
Expand Down Expand Up @@ -78,15 +78,17 @@ def get_company_intensity_and_production_at_base_year(self, company_ids: List[st
overrides subclass method
:param: company_ids: list of company ids
:return: DataFrame the following columns :
ColumnsConfig.COMPANY_ID, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI, ColumnsConfig.SECTOR and
ColumnsConfig.REGION
ColumnsConfig.COMPANY_ID, ColumnsConfig.PRODUCTION, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI,
ColumnsConfig.SECTOR and ColumnsConfig.REGION
"""
df_fundamentals = self.get_company_fundamentals(company_ids)
base_year = self.temp_config.CONTROLS_CONFIG.base_year
company_info = df_fundamentals.loc[
company_ids, [self.column_config.SECTOR, self.column_config.REGION,
self.column_config.PRODUCTION,
self.column_config.GHG_SCOPE12]]
ei_at_base = self._get_company_intensity_at_year(base_year, company_ids).rename(self.column_config.BASE_EI)
# print(f"BA: company_info.loc[] = {company_info.loc['US0185223007']}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented code

return company_info.merge(ei_at_base, left_index=True, right_index=True)

def get_company_fundamentals(self, company_ids: List[str]) -> pd.DataFrame:
Expand All @@ -96,7 +98,7 @@ def get_company_fundamentals(self, company_ids: List[str]) -> pd.DataFrame:
"""
return pd.DataFrame.from_records(
[ICompanyData.parse_obj(c).dict() for c in self.get_company_data(company_ids)],
exclude=['projected_targets', 'projected_intensities']).set_index(self.column_config.COMPANY_ID)
exclude=['projected_production_units', 'projected_targets', 'projected_intensities']).set_index(self.column_config.COMPANY_ID)

def get_company_projected_intensities(self, company_ids: List[str]) -> pd.DataFrame:
"""
Expand All @@ -116,6 +118,15 @@ def get_company_projected_targets(self, company_ids: List[str]) -> pd.DataFrame:
[self._convert_projections_to_series(c, self.column_config.PROJECTED_TARGETS) for c in
self.get_company_data(company_ids)])

def get_company_projected_production(self, company_ids: List[str]) -> pd.DataFrame:
"""
:param company_ids: A list of company IDs
:return: A pandas DataFrame with projected production per company
"""
return pd.DataFrame(
[self._convert_projections_to_series(c, self.column_config.PROJECTED_PRODUCTION) for c in
self.get_company_data(company_ids)])


class BaseProviderProductionBenchmark(ProductionBenchmarkDataProvider):

Expand All @@ -142,7 +153,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark) -> pd.Series:
"""
return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector))

def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame:
def _get_projected_production(self, scope: PScope = PScope.PRODUCTION) -> pd.DataFrame:
"""
Converts IBenchmarkScopes into dataframe for a scope
:param scope: a scope
Expand All @@ -156,19 +167,19 @@ def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame

return df_bm

def get_company_projected_production(self, ghg_scope12: pd.DataFrame) -> pd.DataFrame:
def get_company_projected_production(self, production: pd.DataFrame) -> pd.DataFrame:
"""
get the projected productions for list of companies in ghg_scope12
:param ghg_scope12: DataFrame with at least the following columns :
ColumnsConfig.COMPANY_ID,ColumnsConfig.GHG_SCOPE12, ColumnsConfig.SECTOR and ColumnsConfig.REGION
get the projected productions for list of companies (PRODUCTIONS not S1S2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"(PRODUCTIONS not S1S2)" might be superfluous

:param production: DataFrame with at least the following columns :
ColumnsConfig.COMPANY_ID,ColumnsConfig.PRODUCTION, ColumnsConfig.SECTOR and ColumnsConfig.REGION
:return: DataFrame of projected productions for [base_year - base_year + 50]
"""
benchmark_production_projections = self.get_benchmark_projections(ghg_scope12)
benchmark_production_projections = self.get_benchmark_projections(production)
return benchmark_production_projections.add(1).cumprod(axis=1).mul(
ghg_scope12[self.column_config.GHG_SCOPE12].values, axis=0)
production[self.column_config.PRODUCTION].values, axis=0)

def get_benchmark_projections(self, company_sector_region_info: pd.DataFrame,
scope: EScope = EScope.S1S2) -> pd.DataFrame:
scope: PScope = PScope.S1S2) -> pd.DataFrame:
"""
Overrides subclass method
returns a Dataframe with production benchmarks per company_id given a region and sector.
Expand Down Expand Up @@ -246,7 +257,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark) -> pd.Series:
"""
return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector))

def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.Series:
def _get_projected_intensities(self, scope: PScope = PScope.S1S2) -> pd.Series:
"""
Converts IBenchmarkScopes into dataframe for a scope
:param scope: a scope
Expand All @@ -261,7 +272,7 @@ def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.Series:
return df_bm

def _get_intensity_benchmarks(self, company_sector_region_info: pd.DataFrame,
scope: EScope = EScope.S1S2) -> pd.DataFrame:
scope: PScope = PScope.S1S2) -> pd.DataFrame:
"""
Overrides subclass method
returns a Dataframe with production benchmarks per company_id given a region and sector.
Expand Down
4 changes: 2 additions & 2 deletions ITR/data/data_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def get_company_intensity_and_production_at_base_year(self, company_ids: List[st
Get the emission intensity and the production for a list of companies at the base year.
:param: company_ids: list of company ids
:return: DataFrame the following columns :
ColumnsConfig.COMPANY_ID, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI, ColumnsConfig.SECTOR and
ColumnsConfig.REGION
ColumnsConfig.COMPANY_ID, ColumnsConfig.PRODUCTION, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI,
ColumnsConfig.SECTOR and ColumnsConfig.REGION
"""
raise NotImplementedError

Expand Down
48 changes: 34 additions & 14 deletions ITR/data/data_warehouse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from abc import ABC
from abc import ABC # _project
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider removing "# _project"

from typing import List
import pandas as pd
from pydantic import ValidationError
Expand Down Expand Up @@ -43,27 +43,40 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany
"""
company_data = self.company_data.get_company_data(company_ids)
df_company_data = pd.DataFrame.from_records([c.dict() for c in company_data])

assert pd.Series(company_ids).isin(df_company_data.loc[:, self.column_config.COMPANY_ID]).all(), \
"some of the company ids are not included in the fundamental data"

company_info_at_base_year = self.company_data.get_company_intensity_and_production_at_base_year(company_ids)
# print(f"DW: company_info_at_base_year.loc[] = {company_info_at_base_year.loc['US0185223007']}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented code

projected_production = self.benchmark_projected_production.get_company_projected_production(
company_info_at_base_year)
company_info_at_base_year).sort_index()

df_company_data.loc[:, self.column_config.CUMULATIVE_TRAJECTORY] = self._get_cumulative_emission(
df_new = self._get_cumulative_emission(
projected_emission_intensity=self.company_data.get_company_projected_intensities(company_ids),
projected_production=projected_production).to_numpy()
projected_production=projected_production)
df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_TRAJECTORY}, inplace=True)
df_company_data = df_company_data.merge(df_new, on='company_id', how='right')

df_company_data.loc[:, self.column_config.CUMULATIVE_TARGET] = self._get_cumulative_emission(
df_new = self._get_cumulative_emission(
projected_emission_intensity=self.company_data.get_company_projected_targets(company_ids),
projected_production=projected_production).to_numpy()
projected_production=projected_production)
df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_TARGET}, inplace=True)
df_company_data = df_company_data.merge(df_new, on='company_id', how='right')

df_company_data.loc[:, self.column_config.CUMULATIVE_BUDGET] = self._get_cumulative_emission(
df_new = self._get_cumulative_emission(
projected_emission_intensity=self.benchmarks_projected_emission_intensity.get_SDA_intensity_benchmarks(
company_info_at_base_year),
projected_production=projected_production).to_numpy()

projected_production=projected_production)
df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_BUDGET}, inplace=True)
df_company_data = df_company_data.merge(df_new, on='company_id', how='right')

# 'US00130H1059', 'US0185223007', 'US0188021085'
# print(f"df_company_data.columns = {df_company_data.columns}")
# print(f"BUDG:\n{df_company_data.loc[df_company_data.index<40,['company_id',self.column_config.CUMULATIVE_BUDGET]]}\n\n")
# print(f"CIABY:\n{company_info_at_base_year.loc[df_company_data.index<40,:]}\n\n")
# print(f"""SDA:\n{self.benchmarks_projected_emission_intensity.get_SDA_intensity_benchmarks(
# company_info_at_base_year).loc[df_company_data.index<40,:]}\n\n""")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented code

df_company_data.loc[:,
self.column_config.BENCHMARK_GLOBAL_BUDGET] = self.benchmarks_projected_emission_intensity.benchmark_global_budget
df_company_data.loc[:,
Expand Down Expand Up @@ -94,7 +107,7 @@ def _convert_df_to_model(self, df_company_data: pd.DataFrame) -> List[ICompanyAg
model_companies.append(ICompanyAggregates.parse_obj(company_data))
except ValidationError as e:
logger.warning(
"(one of) the input(s) of company %s is invalid and will be skipped" % company_data[
"DW: (one of) the input(s) of company %s is invalid and will be skipped" % company_data[
self.column_config.COMPANY_NAME])
pass
return model_companies
Expand All @@ -107,6 +120,13 @@ def _get_cumulative_emission(self, projected_emission_intensity: pd.DataFrame, p
:param projected_production: series of projected production series
:return: weighted sum of production and emission
"""

return projected_emission_intensity.reset_index(drop=True).multiply(projected_production.reset_index(
drop=True)).sum(axis=1)
# print(f"DW: projected_emission_intensity['US0185223007'] = {projected_emission_intensity.loc['US0185223007']}")
# print(f"DW: projected_production['US0185223007'] = {projected_production.loc['US0185223007']}")
# print(projected_emission_intensity.index[0:3])
# print(projected_emission_intensity.iloc[0:3])
# print(projected_production.index[0:3])
# print(projected_production.iloc[0:3])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commented code

df = projected_emission_intensity.multiply(projected_production).sum(axis=1)
df = pd.DataFrame(data=df, index=df.index).reset_index()
df.rename(columns={'index':'company_id', 0:'cumulative_value'},inplace=True)
return df
28 changes: 16 additions & 12 deletions ITR/data/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ITR.data.base_providers import BaseCompanyDataProvider, BaseProviderProductionBenchmark, \
BaseProviderIntensityBenchmark
from ITR.configs import ColumnsConfig, TemperatureScoreConfig, SectorsConfig
from ITR.interfaces import ICompanyData, ICompanyProjection, EScope, IEmissionIntensityBenchmarkScopes, \
from ITR.interfaces import ICompanyData, ICompanyProjection, PScope, IEmissionIntensityBenchmarkScopes, \
IProductionBenchmarkScopes, IBenchmark, IBenchmarks, IBenchmarkProjection
import logging

Expand All @@ -30,7 +30,7 @@ def convert_benchmark_excel_to_model(df_excel: pd.DataFrame, sheetname: str, col
result.append(bm)
return IBenchmarks(benchmarks=result)


# ??? This duplicates info from
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it duplicates: it refers to tabs of an excel file rather than columns. We could consider moving it to configs.py.
Comment should be removed, I think.

class TabsConfig:
FUNDAMENTAL = "fundamental_data"
PROJECTED_EI = "projected_ei_in_Wh"
Expand All @@ -53,7 +53,7 @@ def __init__(self, excel_path: str, column_config: Type[ColumnsConfig] = Columns
production_bms = self._convert_excel_to_model(self.benchmark_excel, TabsConfig.PROJECTED_PRODUCTION,
column_config.REGION, column_config.SECTOR)
super().__init__(
IProductionBenchmarkScopes(S1S2=production_bms), column_config,
IProductionBenchmarkScopes(PRODUCTION=production_bms), column_config,
tempscore_config)

def _check_sector_data(self) -> None:
Expand All @@ -65,7 +65,7 @@ def _check_sector_data(self) -> None:
assert pd.Series([TabsConfig.PROJECTED_PRODUCTION, TabsConfig.PROJECTED_EI]).isin(
self.benchmark_excel.keys()).all(), "some tabs are missing in the sector data excel"

def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame:
def _get_projected_production(self, scope: PScope = PScope.PRODUCTION) -> pd.DataFrame:
"""
interface from excel file and internally used DataFrame
:param scope:
Expand Down Expand Up @@ -137,9 +137,10 @@ def _convert_excel_data_to_ICompanyData(self, excel_path: str) -> List[ICompanyD

df_fundamentals = df_company_data[TabsConfig.FUNDAMENTAL]
company_ids = df_fundamentals[self.column_config.COMPANY_ID].unique()
df_production = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_PRODUCTION])
df_targets = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_TARGET])
df_ei = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_EI])
return self._company_df_to_model(df_fundamentals, df_targets, df_ei)
return self._company_df_to_model(df_fundamentals, df_production, df_targets, df_ei)

def _convert_series_to_projections(self, projections: pd.Series, convert_unit: bool = False) -> List[
ICompanyProjection]:
Expand All @@ -152,12 +153,14 @@ def _convert_series_to_projections(self, projections: pd.Series, convert_unit: b
projections = projections * self.ENERGY_UNIT_CONVERSION_FACTOR if convert_unit else projections
return [ICompanyProjection(year=y, value=v) for y, v in projections.items()]

def _company_df_to_model(self, df_fundamentals: pd.DataFrame, df_targets: pd.DataFrame, df_ei: pd.DataFrame) -> \
def _company_df_to_model(self, df_fundamentals: pd.DataFrame,
df_production: pd.DataFrame, df_targets: pd.DataFrame, df_ei: pd.DataFrame) -> \
List[ICompanyData]:
"""
transforms target Dataframe into list of IDataProviderTarget instances

:param df_fundamentals: pandas Dataframe with fundamental data
:param df_production: pandas Dataframe with production
:param df_targets: pandas Dataframe with targets
:param df_ei: pandas Dataframe with emission intensities
:return: A list containing the ICompanyData objects
Expand All @@ -171,20 +174,21 @@ def _company_df_to_model(self, df_fundamentals: pd.DataFrame, df_targets: pd.Dat
for company_data in companies_data_dict:
try:
convert_unit_of_measure = company_data[self.column_config.SECTOR] in self.CORRECTION_SECTORS
company_targets = self._convert_series_to_projections(
df_targets.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure)
company_production = self._convert_series_to_projections(
df_production.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure)
company_ei = self._convert_series_to_projections(
df_ei.loc[company_data[self.column_config.COMPANY_ID], :],
convert_unit_of_measure)

df_ei.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure)
company_targets = self._convert_series_to_projections(
df_targets.loc[company_data[self.column_config.COMPANY_ID], :], False)
company_data.update({self.column_config.PROJECTED_PRODUCTION: {'PRODUCTION': {'projections': company_production}}})
company_data.update({self.column_config.PROJECTED_TARGETS: {'S1S2': {'projections': company_targets}}})
company_data.update({self.column_config.PROJECTED_EI: {'S1S2': {'projections': company_ei}}})

model_companies.append(ICompanyData.parse_obj(company_data))

except ValidationError as e:
logger.warning(
"(one of) the input(s) of company %s is invalid and will be skipped" % company_data[
"EX: (one of) the input(s) of company %s is invalid and will be skipped" % company_data[
self.column_config.COMPANY_NAME])
pass
return model_companies
Expand Down
Loading