Custom EPDx Parser in Python
EPDx is an exchange format for Environmental Product Declarations (EPDs). It is based on JSON and is designed to be easy to read and write by humans and machines alike. EPDx is an open standard that can be used by anyone without restriction.
To be an exchange format you need to be able to convert from other formats to EPDx. This is a writeup of how to create a custom parser for converting a CSV file into EPDx.
To do that we will overwrite the from_dict
class method of the epdx.pydantic.EPD
class.
This method is used to convert a dictionary into a class instance of epdx.pydantic.EPD
.
Basically what we need to do is:
from epdx.pydantic import EPD
class EPDx(EPD):
@classmethod
def from_dict(cls, table7_object: dict):
"""Convert a row from the table 7 csv to an EPDx object"""
return cls(
...
)
Convert CSV to Dict
However, the first thing we need to do is to convert the data from the CSV file into a dictionary.
We can use the csv
module for that:
def main(path: Path, out_path: Path):
reader = csv.DictReader(io.StringIO(path.read_text()))
for row in reader:
parse_row(row, out_path)
if __name__ == "__main__":
p = Path("tabel7.csv")
out = Path(__file__).parent.parent / "table7"
main(p, out)
CSV to EPDx
Next thing we need to do is to convert the data from the CSV file into an EPDx object and write it back to disk as a json file.
def parse_row(row: dict, out_path: Path):
if row.get("Sorterings ID").startswith("#S"):
return
epd = EPDx.from_dict(row)
(out_path / f"{epd.id}.json").write_text(epd.json(ensure_ascii=False, indent=2))
EPDx Class
Next we just need to populate the from_dict
method and "map" the right fields from the CSV to the fields in the EPDx
class.
class EPDx(EPD):
@classmethod
def from_dict(cls, table7_object: dict):
"""Convert a row from the table 7 csv to an EPDx object"""
declared_factor = float(table7_object.get("Deklareret faktor (FU)"))
declared_unit = table7_object.get("Deklareret enhed (FU)")
table7_id = table7_object.get("Sorterings ID")
return cls(
id=cls.convert_lcabyg_id(table7_id),
format_version=importlib.metadata.version("epdx"),
name=table7_object.get("Navn DK"),
version="version 2 - 201222",
declared_unit=cls.convert_unit(declared_unit),
valid_until=datetime(year=2025, month=12, day=22),
published_date=datetime(year=2020, month=12, day=22),
source="BR18 - Tabel 7",
standard=Standard.EN15804A1,
subtype=cls.convert_subtype(table7_object.get("Data type")),
comment=table7_id,
reference_service_life=None,
location="DK",
conversions=[
{"to": Unit.KG,
"value": float(table7_object.get("Masse faktor")) * declared_factor}
],
gwp={
"a1a3": cls.convert_gwp(
table7_object.get("Global Opvarmning, modul A1-A3"),
declared_factor
),
"a4": None,
"a5": None,
"b1": None,
"b2": None,
"b3": None,
"b4": None,
"b5": None,
"b6": None,
"b7": None,
"c1": None,
"c2": None,
"c3": cls.convert_gwp(table7_object.get("Global Opvarmning, modul C3"), declared_factor),
"c4": cls.convert_gwp(table7_object.get("Global Opvarmning, modul C4"), declared_factor),
"d": cls.convert_gwp(table7_object.get("Global Opvarmning, modul D"), declared_factor),
},
meta_fields={"data_source": table7_object.get("Url (link)")},
)
Complete Code
The full file can be seen below and is also available in the Table7 repository.
import csv
import io
import json
import uuid
from datetime import datetime
from pathlib import Path
import importlib.metadata
from epdx.pydantic import EPD, Standard, SubType, Unit
class EPDx(EPD):
@classmethod
def from_dict(cls, table7_object: dict):
"""Convert a row from the table 7 csv to an EPDx object"""
declared_factor = float(table7_object.get("Deklareret faktor (FU)"))
declared_unit = table7_object.get("Deklareret enhed (FU)")
table7_id = table7_object.get("Sorterings ID")
epd = cls(
id=cls.convert_lcabyg_id(table7_id),
format_version=importlib.metadata.version("epdx"),
name=table7_object.get("Navn DK"),
version="version 2 - 201222",
declared_unit=cls.convert_unit(declared_unit),
valid_until=datetime(year=2025, month=12, day=22),
published_date=datetime(year=2020, month=12, day=22),
source="BR18 - Tabel 7",
standard=Standard.EN15804A1,
subtype=cls.convert_subtype(table7_object.get("Data type")),
comment=table7_id,
reference_service_life=None,
location="DK",
conversions=[
{"to": Unit.KG,
"value": float(table7_object.get("Masse faktor")) * declared_factor}
],
gwp={
"a1a3": cls.convert_gwp(
table7_object.get("Global Opvarmning, modul A1-A3"),
declared_factor
),
"a4": None,
"a5": None,
"b1": None,
"b2": None,
"b3": None,
"b4": None,
"b5": None,
"b6": None,
"b7": None,
"c1": None,
"c2": None,
"c3": cls.convert_gwp(table7_object.get("Global Opvarmning, modul C3"), declared_factor),
"c4": cls.convert_gwp(table7_object.get("Global Opvarmning, modul C4"), declared_factor),
"d": cls.convert_gwp(table7_object.get("Global Opvarmning, modul D"), declared_factor),
},
meta_fields={"data_source": table7_object.get("Url (link)")},
)
return epd
@staticmethod
def convert_lcabyg_id(bpst_id: str) -> str:
_map = json.loads(Path("lcabyg_tabel7_map.json").read_text())
return _map.get(bpst_id, str(uuid.uuid4()))
@staticmethod
def convert_unit(unit: str) -> Unit:
match unit:
case "STK":
return Unit.PCS
case "M":
return Unit.M
case "M2":
return Unit.M2
case "M3":
return Unit.M3
case "KG":
return Unit.KG
case "L":
return Unit.L
case _:
return Unit.UNKNOWN
@staticmethod
def convert_subtype(subtype: str) -> SubType:
_map = {
"Generisk data": SubType.Generic,
"Branche data": SubType.Industry,
}
return _map.get(subtype)
@staticmethod
def convert_gwp(gwp: str, declared_factor: float) -> float | None:
if gwp == "-":
return None
else:
return float(gwp) / declared_factor
def main(path: Path, out_path: Path):
reader = csv.DictReader(io.StringIO(path.read_text()))
for row in reader:
parse_row(row, out_path)
def parse_row(row: dict, out_path: Path):
if row.get("Sorterings ID").startswith("#S"):
return
epd = EPDx.from_dict(row)
(out_path / f"{epd.id}.json").write_text(epd.json(ensure_ascii=False, indent=2))
if __name__ == "__main__":
p = Path("tabel7.csv")
out = Path(__file__).parent.parent / "table7"
main(p, out)