{
  "@context": {
    "@language": "en",
    "@vocab": "https://schema.org/",
    "citeAs": "cr:citeAs",
    "column": "cr:column",
    "conformsTo": "dct:conformsTo",
    "cr": "http://mlcommons.org/croissant/",
    "data": {
      "@id": "cr:data",
      "@type": "@json"
    },
    "dataType": {
      "@id": "cr:dataType",
      "@type": "@vocab"
    },
    "dct": "http://purl.org/dc/terms/",
    "extract": "cr:extract",
    "field": "cr:field",
    "fileObject": "cr:fileObject",
    "fileProperty": "cr:fileProperty",
    "format": "cr:format",
    "includes": "cr:includes",
    "isLiveDataset": "cr:isLiveDataset",
    "key": "cr:key",
    "md5": "cr:md5",
    "parentField": "cr:parentField",
    "path": "cr:path",
    "recordSet": "cr:recordSet",
    "references": "cr:references",
    "repeated": "cr:repeated",
    "sc": "https://schema.org/",
    "sha256": "sha256",
    "source": "cr:source",
    "subField": "cr:subField"
  },
  "@type": "sc:Dataset",
  "citeAs": null,
  "conformsTo": "http://mlcommons.org/croissant/1.0",
  "creator": [],
  "description": "ChEMBL IR Raman multiblock. v2.0 standardized NIRS package: 2 spectral source(s), 14 declared target(s). Auto-generated from dataset_card.json (verify before publication).\n\nOpen tier: freely usable and redistributable under the stated license.",
  "distribution": [
    {
      "@id": "file/X1.parquet",
      "@type": "cr:FileObject",
      "encodingFormat": "application/vnd.apache.parquet",
      "name": "IR",
      "sha256": "3cd569b572569248321169f75f6cdf1cdee7da83bce9d092f1b751a8c9f589a8"
    },
    {
      "@id": "file/X2.parquet",
      "@type": "cr:FileObject",
      "encodingFormat": "application/vnd.apache.parquet",
      "name": "Raman",
      "sha256": "3241d988fb4871984fd90d6d55dcfbcc2020415121372cbb6d64525a3af9b72a"
    },
    {
      "@id": "file/variables.parquet",
      "@type": "cr:FileObject",
      "encodingFormat": "application/vnd.apache.parquet",
      "name": "variables",
      "sha256": "558a7414c21ed57910d34349bb6c294bfe2ffcb4d6624df5b466add9e99e82de"
    }
  ],
  "keywords": [
    "nir",
    "v2",
    "chembl"
  ],
  "license": "CC-BY-4.0",
  "name": "chembl_ir_raman_multiblock",
  "recordSet": [
    {
      "@id": "X1",
      "@type": "cr:RecordSet",
      "description": "Spectra from source 'IR' (50000 observations x 512 wavelengths).",
      "field": [
        {
          "@id": "X1/sample_id",
          "@type": "cr:Field",
          "dataType": "sc:Text",
          "description": "Sample identity (join key across sources and variables).",
          "name": "sample_id",
          "source": {
            "extract": {
              "column": "sample_id"
            },
            "fileObject": {
              "@id": "file/X1.parquet"
            }
          }
        },
        {
          "@id": "X1/spectrum",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Spectral intensities across the cm-1 axis.",
          "name": "spectrum",
          "repeated": true,
          "source": {
            "fileObject": {
              "@id": "file/X1.parquet"
            }
          }
        }
      ],
      "key": {
        "@id": "X1/sample_id"
      },
      "name": "X1"
    },
    {
      "@id": "X2",
      "@type": "cr:RecordSet",
      "description": "Spectra from source 'Raman' (50000 observations x 512 wavelengths).",
      "field": [
        {
          "@id": "X2/sample_id",
          "@type": "cr:Field",
          "dataType": "sc:Text",
          "description": "Sample identity (join key across sources and variables).",
          "name": "sample_id",
          "source": {
            "extract": {
              "column": "sample_id"
            },
            "fileObject": {
              "@id": "file/X2.parquet"
            }
          }
        },
        {
          "@id": "X2/spectrum",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Spectral intensities across the cm-1 axis.",
          "name": "spectrum",
          "repeated": true,
          "source": {
            "fileObject": {
              "@id": "file/X2.parquet"
            }
          }
        }
      ],
      "key": {
        "@id": "X2/sample_id"
      },
      "name": "X2"
    },
    {
      "@id": "variables",
      "@type": "cr:RecordSet",
      "description": "Per-sample target and metadata variables.",
      "field": [
        {
          "@id": "variables/sample_id",
          "@type": "cr:Field",
          "dataType": "sc:Text",
          "description": "Sample identity (join key).",
          "name": "sample_id",
          "source": {
            "extract": {
              "column": "sample_id"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/chembl_id",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'chembl_id' (role target, type categorical).",
          "name": "chembl_id",
          "source": {
            "extract": {
              "column": "chembl_id"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/compound_name",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'compound_name' (role target, type categorical).",
          "name": "compound_name",
          "source": {
            "extract": {
              "column": "compound_name"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/molecular_formula",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'molecular_formula' (role target, type categorical).",
          "name": "molecular_formula",
          "source": {
            "extract": {
              "column": "molecular_formula"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/smiles",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'smiles' (role target, type categorical).",
          "name": "smiles",
          "source": {
            "extract": {
              "column": "smiles"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/inchi",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'inchi' (role target, type categorical).",
          "name": "inchi",
          "source": {
            "extract": {
              "column": "inchi"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/inchikey",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'inchikey' (role target, type categorical).",
          "name": "inchikey",
          "source": {
            "extract": {
              "column": "inchikey"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/compound_class",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'compound_class' (role target, type categorical).",
          "name": "compound_class",
          "source": {
            "extract": {
              "column": "compound_class"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/band_gap",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'band_gap' (role target, type numeric).",
          "name": "band_gap",
          "source": {
            "extract": {
              "column": "band_gap"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/dipole_moment_total",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'dipole_moment_total' (role target, type numeric).",
          "name": "dipole_moment_total",
          "source": {
            "extract": {
              "column": "dipole_moment_total"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/isotropic_polarizability",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'isotropic_polarizability' (role target, type numeric).",
          "name": "isotropic_polarizability",
          "source": {
            "extract": {
              "column": "isotropic_polarizability"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/homo",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'homo' (role target, type numeric).",
          "name": "homo",
          "source": {
            "extract": {
              "column": "homo"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/lumo",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'lumo' (role target, type numeric).",
          "name": "lumo",
          "source": {
            "extract": {
              "column": "lumo"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/electronic_ext",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'electronic_ext' (role target, type numeric).",
          "name": "electronic_ext",
          "source": {
            "extract": {
              "column": "electronic_ext"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/heat_capacity",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'heat_capacity' (role target, type numeric).",
          "name": "heat_capacity",
          "source": {
            "extract": {
              "column": "heat_capacity"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/molecule_id",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'molecule_id' (role metadata, type categorical).",
          "name": "molecule_id",
          "source": {
            "extract": {
              "column": "molecule_id"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/spectroscopy_type",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'spectroscopy_type' (role metadata, type categorical).",
          "name": "spectroscopy_type",
          "source": {
            "extract": {
              "column": "spectroscopy_type"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/signal_type",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'signal_type' (role metadata, type categorical).",
          "name": "signal_type",
          "source": {
            "extract": {
              "column": "signal_type"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/axis_unit",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'axis_unit' (role metadata, type categorical).",
          "name": "axis_unit",
          "source": {
            "extract": {
              "column": "axis_unit"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/axis_min",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'axis_min' (role metadata, type numeric).",
          "name": "axis_min",
          "source": {
            "extract": {
              "column": "axis_min"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/axis_max",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'axis_max' (role metadata, type numeric).",
          "name": "axis_max",
          "source": {
            "extract": {
              "column": "axis_max"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/n_points_original",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'n_points_original' (role metadata, type numeric).",
          "name": "n_points_original",
          "source": {
            "extract": {
              "column": "n_points_original"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/preprocessing_original",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'preprocessing_original' (role metadata, type categorical).",
          "name": "preprocessing_original",
          "source": {
            "extract": {
              "column": "preprocessing_original"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/data_source",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'data_source' (role metadata, type categorical).",
          "name": "data_source",
          "source": {
            "extract": {
              "column": "data_source"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/publication_doi",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'publication_doi' (role metadata, type categorical).",
          "name": "publication_doi",
          "source": {
            "extract": {
              "column": "publication_doi"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/citation",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'citation' (role metadata, type categorical).",
          "name": "citation",
          "source": {
            "extract": {
              "column": "citation"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/license",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'license' (role metadata, type categorical).",
          "name": "license",
          "source": {
            "extract": {
              "column": "license"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/rights_status",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'rights_status' (role metadata, type categorical).",
          "name": "rights_status",
          "source": {
            "extract": {
              "column": "rights_status"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/usage_scope",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'usage_scope' (role metadata, type categorical).",
          "name": "usage_scope",
          "source": {
            "extract": {
              "column": "usage_scope"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/notes",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'notes' (role metadata, type categorical).",
          "name": "notes",
          "source": {
            "extract": {
              "column": "notes"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/sdf_name",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'sdf_name' (role metadata, type categorical).",
          "name": "sdf_name",
          "source": {
            "extract": {
              "column": "sdf_name"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/n_atoms",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'n_atoms' (role metadata, type numeric).",
          "name": "n_atoms",
          "source": {
            "extract": {
              "column": "n_atoms"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/n_modes_total",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'n_modes_total' (role metadata, type numeric).",
          "name": "n_modes_total",
          "source": {
            "extract": {
              "column": "n_modes_total"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/n_modes_positive",
          "@type": "cr:Field",
          "dataType": "sc:Float",
          "description": "Variable 'n_modes_positive' (role metadata, type numeric).",
          "name": "n_modes_positive",
          "source": {
            "extract": {
              "column": "n_modes_positive"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        },
        {
          "@id": "variables/has_negative_freq",
          "@type": "cr:Field",
          "dataType": "sc:Integer",
          "description": "Variable 'has_negative_freq' (role metadata, type categorical).",
          "name": "has_negative_freq",
          "source": {
            "extract": {
              "column": "has_negative_freq"
            },
            "fileObject": {
              "@id": "file/variables.parquet"
            }
          }
        }
      ],
      "key": {
        "@id": "variables/sample_id"
      },
      "name": "variables"
    }
  ],
  "sameAs": [
    "https://figshare.com/articles/dataset/Raman-ChEMBL-part1/28593698",
    "10.6084/m9.figshare.28593698.v3",
    "10.6084/m9.figshare.28594295.v3",
    "source_to_standard.py"
  ],
  "url": "https://entrepot.recherche.data.gouv.fr",
  "version": "1.0.0"
}