-
Notifications
You must be signed in to change notification settings - Fork 8
Create credible temp score using RMI data #16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
3d1a26e
03006c4
ae9c44d
3e42e0e
6329dcb
d2da6c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,7 @@ | |
| from typing import List, Type | ||
| from ITR.configs import ColumnsConfig, TemperatureScoreConfig | ||
| from ITR.data.data_providers import CompanyDataProvider, ProductionBenchmarkDataProvider, IntensityBenchmarkDataProvider | ||
| from ITR.interfaces import ICompanyData, EScope, IProductionBenchmarkScopes, IEmissionIntensityBenchmarkScopes, \ | ||
| from ITR.interfaces import ICompanyData, PScope, IProductionBenchmarkScopes, IEmissionIntensityBenchmarkScopes, \ | ||
| IBenchmark | ||
|
|
||
|
|
||
|
|
@@ -28,7 +28,7 @@ def __init__(self, | |
| self.temp_config = tempscore_config | ||
|
|
||
| def _convert_projections_to_series(self, company: ICompanyData, feature: str, | ||
| scope: EScope = EScope.S1S2) -> pd.Series: | ||
| scope: PScope = PScope.S1S2) -> pd.Series: | ||
| """ | ||
| extracts the company projected intensities or targets for a given scope | ||
| :param feature: PROJECTED_EI or PROJECTED_TARGETS | ||
|
|
@@ -78,15 +78,17 @@ def get_company_intensity_and_production_at_base_year(self, company_ids: List[st | |
| overrides subclass method | ||
| :param: company_ids: list of company ids | ||
| :return: DataFrame the following columns : | ||
| ColumnsConfig.COMPANY_ID, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI, ColumnsConfig.SECTOR and | ||
| ColumnsConfig.REGION | ||
| ColumnsConfig.COMPANY_ID, ColumnsConfig.PRODUCTION, ColumnsConfig.GHG_S1S2, ColumnsConfig.BASE_EI, | ||
| ColumnsConfig.SECTOR and ColumnsConfig.REGION | ||
| """ | ||
| df_fundamentals = self.get_company_fundamentals(company_ids) | ||
| base_year = self.temp_config.CONTROLS_CONFIG.base_year | ||
| company_info = df_fundamentals.loc[ | ||
| company_ids, [self.column_config.SECTOR, self.column_config.REGION, | ||
| self.column_config.PRODUCTION, | ||
| self.column_config.GHG_SCOPE12]] | ||
| ei_at_base = self._get_company_intensity_at_year(base_year, company_ids).rename(self.column_config.BASE_EI) | ||
| # print(f"BA: company_info.loc[] = {company_info.loc['US0185223007']}") | ||
| return company_info.merge(ei_at_base, left_index=True, right_index=True) | ||
|
|
||
| def get_company_fundamentals(self, company_ids: List[str]) -> pd.DataFrame: | ||
|
|
@@ -96,7 +98,7 @@ def get_company_fundamentals(self, company_ids: List[str]) -> pd.DataFrame: | |
| """ | ||
| return pd.DataFrame.from_records( | ||
| [ICompanyData.parse_obj(c).dict() for c in self.get_company_data(company_ids)], | ||
| exclude=['projected_targets', 'projected_intensities']).set_index(self.column_config.COMPANY_ID) | ||
| exclude=['projected_production_units', 'projected_targets', 'projected_intensities']).set_index(self.column_config.COMPANY_ID) | ||
|
|
||
| def get_company_projected_intensities(self, company_ids: List[str]) -> pd.DataFrame: | ||
| """ | ||
|
|
@@ -116,6 +118,15 @@ def get_company_projected_targets(self, company_ids: List[str]) -> pd.DataFrame: | |
| [self._convert_projections_to_series(c, self.column_config.PROJECTED_TARGETS) for c in | ||
| self.get_company_data(company_ids)]) | ||
|
|
||
| def get_company_projected_production(self, company_ids: List[str]) -> pd.DataFrame: | ||
| """ | ||
| :param company_ids: A list of company IDs | ||
| :return: A pandas DataFrame with projected production per company | ||
| """ | ||
| return pd.DataFrame( | ||
| [self._convert_projections_to_series(c, self.column_config.PROJECTED_PRODUCTION) for c in | ||
| self.get_company_data(company_ids)]) | ||
|
|
||
|
|
||
| class BaseProviderProductionBenchmark(ProductionBenchmarkDataProvider): | ||
|
|
||
|
|
@@ -142,7 +153,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark) -> pd.Series: | |
| """ | ||
| return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector)) | ||
|
|
||
| def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame: | ||
| def _get_projected_production(self, scope: PScope = PScope.PRODUCTION) -> pd.DataFrame: | ||
| """ | ||
| Converts IBenchmarkScopes into dataframe for a scope | ||
| :param scope: a scope | ||
|
|
@@ -156,19 +167,19 @@ def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame | |
|
|
||
| return df_bm | ||
|
|
||
| def get_company_projected_production(self, ghg_scope12: pd.DataFrame) -> pd.DataFrame: | ||
| def get_company_projected_production(self, production: pd.DataFrame) -> pd.DataFrame: | ||
| """ | ||
| get the projected productions for list of companies in ghg_scope12 | ||
| :param ghg_scope12: DataFrame with at least the following columns : | ||
| ColumnsConfig.COMPANY_ID,ColumnsConfig.GHG_SCOPE12, ColumnsConfig.SECTOR and ColumnsConfig.REGION | ||
| get the projected productions for list of companies (PRODUCTIONS not S1S2) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "(PRODUCTIONS not S1S2)" might be superfluous |
||
| :param production: DataFrame with at least the following columns : | ||
| ColumnsConfig.COMPANY_ID,ColumnsConfig.PRODUCTION, ColumnsConfig.SECTOR and ColumnsConfig.REGION | ||
| :return: DataFrame of projected productions for [base_year - base_year + 50] | ||
| """ | ||
| benchmark_production_projections = self.get_benchmark_projections(ghg_scope12) | ||
| benchmark_production_projections = self.get_benchmark_projections(production) | ||
| return benchmark_production_projections.add(1).cumprod(axis=1).mul( | ||
| ghg_scope12[self.column_config.GHG_SCOPE12].values, axis=0) | ||
| production[self.column_config.PRODUCTION].values, axis=0) | ||
|
|
||
| def get_benchmark_projections(self, company_sector_region_info: pd.DataFrame, | ||
| scope: EScope = EScope.S1S2) -> pd.DataFrame: | ||
| scope: PScope = PScope.S1S2) -> pd.DataFrame: | ||
| """ | ||
| Overrides subclass method | ||
| returns a Dataframe with production benchmarks per company_id given a region and sector. | ||
|
|
@@ -246,7 +257,7 @@ def _convert_benchmark_to_series(self, benchmark: IBenchmark) -> pd.Series: | |
| """ | ||
| return pd.Series({r.year: r.value for r in benchmark.projections}, name=(benchmark.region, benchmark.sector)) | ||
|
|
||
| def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.Series: | ||
| def _get_projected_intensities(self, scope: PScope = PScope.S1S2) -> pd.Series: | ||
| """ | ||
| Converts IBenchmarkScopes into dataframe for a scope | ||
| :param scope: a scope | ||
|
|
@@ -261,7 +272,7 @@ def _get_projected_intensities(self, scope: EScope = EScope.S1S2) -> pd.Series: | |
| return df_bm | ||
|
|
||
| def _get_intensity_benchmarks(self, company_sector_region_info: pd.DataFrame, | ||
| scope: EScope = EScope.S1S2) -> pd.DataFrame: | ||
| scope: PScope = PScope.S1S2) -> pd.DataFrame: | ||
| """ | ||
| Overrides subclass method | ||
| returns a Dataframe with production benchmarks per company_id given a region and sector. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| from abc import ABC | ||
| from abc import ABC # _project | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider removing "# _project" |
||
| from typing import List | ||
| import pandas as pd | ||
| from pydantic import ValidationError | ||
|
|
@@ -43,27 +43,40 @@ def get_preprocessed_company_data(self, company_ids: List[str]) -> List[ICompany | |
| """ | ||
| company_data = self.company_data.get_company_data(company_ids) | ||
| df_company_data = pd.DataFrame.from_records([c.dict() for c in company_data]) | ||
|
|
||
| assert pd.Series(company_ids).isin(df_company_data.loc[:, self.column_config.COMPANY_ID]).all(), \ | ||
| "some of the company ids are not included in the fundamental data" | ||
|
|
||
| company_info_at_base_year = self.company_data.get_company_intensity_and_production_at_base_year(company_ids) | ||
| # print(f"DW: company_info_at_base_year.loc[] = {company_info_at_base_year.loc['US0185223007']}") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented code |
||
| projected_production = self.benchmark_projected_production.get_company_projected_production( | ||
| company_info_at_base_year) | ||
| company_info_at_base_year).sort_index() | ||
|
|
||
| df_company_data.loc[:, self.column_config.CUMULATIVE_TRAJECTORY] = self._get_cumulative_emission( | ||
| df_new = self._get_cumulative_emission( | ||
| projected_emission_intensity=self.company_data.get_company_projected_intensities(company_ids), | ||
| projected_production=projected_production).to_numpy() | ||
| projected_production=projected_production) | ||
| df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_TRAJECTORY}, inplace=True) | ||
| df_company_data = df_company_data.merge(df_new, on='company_id', how='right') | ||
|
|
||
| df_company_data.loc[:, self.column_config.CUMULATIVE_TARGET] = self._get_cumulative_emission( | ||
| df_new = self._get_cumulative_emission( | ||
| projected_emission_intensity=self.company_data.get_company_projected_targets(company_ids), | ||
| projected_production=projected_production).to_numpy() | ||
| projected_production=projected_production) | ||
| df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_TARGET}, inplace=True) | ||
| df_company_data = df_company_data.merge(df_new, on='company_id', how='right') | ||
|
|
||
| df_company_data.loc[:, self.column_config.CUMULATIVE_BUDGET] = self._get_cumulative_emission( | ||
| df_new = self._get_cumulative_emission( | ||
| projected_emission_intensity=self.benchmarks_projected_emission_intensity.get_SDA_intensity_benchmarks( | ||
| company_info_at_base_year), | ||
| projected_production=projected_production).to_numpy() | ||
|
|
||
| projected_production=projected_production) | ||
| df_new.rename(columns={"cumulative_value":self.column_config.CUMULATIVE_BUDGET}, inplace=True) | ||
| df_company_data = df_company_data.merge(df_new, on='company_id', how='right') | ||
|
|
||
| # 'US00130H1059', 'US0185223007', 'US0188021085' | ||
| # print(f"df_company_data.columns = {df_company_data.columns}") | ||
| # print(f"BUDG:\n{df_company_data.loc[df_company_data.index<40,['company_id',self.column_config.CUMULATIVE_BUDGET]]}\n\n") | ||
| # print(f"CIABY:\n{company_info_at_base_year.loc[df_company_data.index<40,:]}\n\n") | ||
| # print(f"""SDA:\n{self.benchmarks_projected_emission_intensity.get_SDA_intensity_benchmarks( | ||
| # company_info_at_base_year).loc[df_company_data.index<40,:]}\n\n""") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented code |
||
| df_company_data.loc[:, | ||
| self.column_config.BENCHMARK_GLOBAL_BUDGET] = self.benchmarks_projected_emission_intensity.benchmark_global_budget | ||
| df_company_data.loc[:, | ||
|
|
@@ -94,7 +107,7 @@ def _convert_df_to_model(self, df_company_data: pd.DataFrame) -> List[ICompanyAg | |
| model_companies.append(ICompanyAggregates.parse_obj(company_data)) | ||
| except ValidationError as e: | ||
| logger.warning( | ||
| "(one of) the input(s) of company %s is invalid and will be skipped" % company_data[ | ||
| "DW: (one of) the input(s) of company %s is invalid and will be skipped" % company_data[ | ||
| self.column_config.COMPANY_NAME]) | ||
| pass | ||
| return model_companies | ||
|
|
@@ -107,6 +120,13 @@ def _get_cumulative_emission(self, projected_emission_intensity: pd.DataFrame, p | |
| :param projected_production: series of projected production series | ||
| :return: weighted sum of production and emission | ||
| """ | ||
|
|
||
| return projected_emission_intensity.reset_index(drop=True).multiply(projected_production.reset_index( | ||
| drop=True)).sum(axis=1) | ||
| # print(f"DW: projected_emission_intensity['US0185223007'] = {projected_emission_intensity.loc['US0185223007']}") | ||
| # print(f"DW: projected_production['US0185223007'] = {projected_production.loc['US0185223007']}") | ||
| # print(projected_emission_intensity.index[0:3]) | ||
| # print(projected_emission_intensity.iloc[0:3]) | ||
| # print(projected_production.index[0:3]) | ||
| # print(projected_production.iloc[0:3]) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented code |
||
| df = projected_emission_intensity.multiply(projected_production).sum(axis=1) | ||
| df = pd.DataFrame(data=df, index=df.index).reset_index() | ||
| df.rename(columns={'index':'company_id', 0:'cumulative_value'},inplace=True) | ||
| return df | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
| from ITR.data.base_providers import BaseCompanyDataProvider, BaseProviderProductionBenchmark, \ | ||
| BaseProviderIntensityBenchmark | ||
| from ITR.configs import ColumnsConfig, TemperatureScoreConfig, SectorsConfig | ||
| from ITR.interfaces import ICompanyData, ICompanyProjection, EScope, IEmissionIntensityBenchmarkScopes, \ | ||
| from ITR.interfaces import ICompanyData, ICompanyProjection, PScope, IEmissionIntensityBenchmarkScopes, \ | ||
| IProductionBenchmarkScopes, IBenchmark, IBenchmarks, IBenchmarkProjection | ||
| import logging | ||
|
|
||
|
|
@@ -30,7 +30,7 @@ def convert_benchmark_excel_to_model(df_excel: pd.DataFrame, sheetname: str, col | |
| result.append(bm) | ||
| return IBenchmarks(benchmarks=result) | ||
|
|
||
|
|
||
| # ??? This duplicates info from | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it duplicates: it refers to tabs of an excel file rather than columns. We could consider moving it to configs.py. |
||
| class TabsConfig: | ||
| FUNDAMENTAL = "fundamental_data" | ||
| PROJECTED_EI = "projected_ei_in_Wh" | ||
|
|
@@ -53,7 +53,7 @@ def __init__(self, excel_path: str, column_config: Type[ColumnsConfig] = Columns | |
| production_bms = self._convert_excel_to_model(self.benchmark_excel, TabsConfig.PROJECTED_PRODUCTION, | ||
| column_config.REGION, column_config.SECTOR) | ||
| super().__init__( | ||
| IProductionBenchmarkScopes(S1S2=production_bms), column_config, | ||
| IProductionBenchmarkScopes(PRODUCTION=production_bms), column_config, | ||
| tempscore_config) | ||
|
|
||
| def _check_sector_data(self) -> None: | ||
|
|
@@ -65,7 +65,7 @@ def _check_sector_data(self) -> None: | |
| assert pd.Series([TabsConfig.PROJECTED_PRODUCTION, TabsConfig.PROJECTED_EI]).isin( | ||
| self.benchmark_excel.keys()).all(), "some tabs are missing in the sector data excel" | ||
|
|
||
| def _get_projected_production(self, scope: EScope = EScope.S1S2) -> pd.DataFrame: | ||
| def _get_projected_production(self, scope: PScope = PScope.PRODUCTION) -> pd.DataFrame: | ||
| """ | ||
| interface from excel file and internally used DataFrame | ||
| :param scope: | ||
|
|
@@ -137,9 +137,10 @@ def _convert_excel_data_to_ICompanyData(self, excel_path: str) -> List[ICompanyD | |
|
|
||
| df_fundamentals = df_company_data[TabsConfig.FUNDAMENTAL] | ||
| company_ids = df_fundamentals[self.column_config.COMPANY_ID].unique() | ||
| df_production = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_PRODUCTION]) | ||
| df_targets = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_TARGET]) | ||
| df_ei = self._get_projection(company_ids, df_company_data[TabsConfig.PROJECTED_EI]) | ||
| return self._company_df_to_model(df_fundamentals, df_targets, df_ei) | ||
| return self._company_df_to_model(df_fundamentals, df_production, df_targets, df_ei) | ||
|
|
||
| def _convert_series_to_projections(self, projections: pd.Series, convert_unit: bool = False) -> List[ | ||
| ICompanyProjection]: | ||
|
|
@@ -152,12 +153,14 @@ def _convert_series_to_projections(self, projections: pd.Series, convert_unit: b | |
| projections = projections * self.ENERGY_UNIT_CONVERSION_FACTOR if convert_unit else projections | ||
| return [ICompanyProjection(year=y, value=v) for y, v in projections.items()] | ||
|
|
||
| def _company_df_to_model(self, df_fundamentals: pd.DataFrame, df_targets: pd.DataFrame, df_ei: pd.DataFrame) -> \ | ||
| def _company_df_to_model(self, df_fundamentals: pd.DataFrame, | ||
| df_production: pd.DataFrame, df_targets: pd.DataFrame, df_ei: pd.DataFrame) -> \ | ||
| List[ICompanyData]: | ||
| """ | ||
| transforms target Dataframe into list of IDataProviderTarget instances | ||
|
|
||
| :param df_fundamentals: pandas Dataframe with fundamental data | ||
| :param df_production: pandas Dataframe with production | ||
| :param df_targets: pandas Dataframe with targets | ||
| :param df_ei: pandas Dataframe with emission intensities | ||
| :return: A list containing the ICompanyData objects | ||
|
|
@@ -171,20 +174,21 @@ def _company_df_to_model(self, df_fundamentals: pd.DataFrame, df_targets: pd.Dat | |
| for company_data in companies_data_dict: | ||
| try: | ||
| convert_unit_of_measure = company_data[self.column_config.SECTOR] in self.CORRECTION_SECTORS | ||
| company_targets = self._convert_series_to_projections( | ||
| df_targets.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure) | ||
| company_production = self._convert_series_to_projections( | ||
| df_production.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure) | ||
| company_ei = self._convert_series_to_projections( | ||
| df_ei.loc[company_data[self.column_config.COMPANY_ID], :], | ||
| convert_unit_of_measure) | ||
|
|
||
| df_ei.loc[company_data[self.column_config.COMPANY_ID], :], convert_unit_of_measure) | ||
| company_targets = self._convert_series_to_projections( | ||
| df_targets.loc[company_data[self.column_config.COMPANY_ID], :], False) | ||
| company_data.update({self.column_config.PROJECTED_PRODUCTION: {'PRODUCTION': {'projections': company_production}}}) | ||
| company_data.update({self.column_config.PROJECTED_TARGETS: {'S1S2': {'projections': company_targets}}}) | ||
| company_data.update({self.column_config.PROJECTED_EI: {'S1S2': {'projections': company_ei}}}) | ||
|
|
||
| model_companies.append(ICompanyData.parse_obj(company_data)) | ||
|
|
||
| except ValidationError as e: | ||
| logger.warning( | ||
| "(one of) the input(s) of company %s is invalid and will be skipped" % company_data[ | ||
| "EX: (one of) the input(s) of company %s is invalid and will be skipped" % company_data[ | ||
| self.column_config.COMPANY_NAME]) | ||
| pass | ||
| return model_companies | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove commented code