Trilegal priors to HATS#

Objective: Convert directory of numpy files into a HATS map catalog

Step 1 - .npy to .parquet

We start with

unpacking the numpy files into parquet tables
adding HATS-specfic columns
saving with name consistent with HATS expected directories

[ ]:

import numpy as np
from pathlib import Path


prior_np_dir = Path("/mnt/beegfs/scratch/data/priors/TRILEGAL/S82/5")
healpix_order = 5

[15]:

destination_dir = Path("/home/delucchi/git/goofing/priors")
(destination_dir / "dataset" / "Norder=5" / "Dir=0").mkdir(parents=True, exist_ok=True)
(destination_dir / "dataset" / "Norder=5" / "Dir=10000").mkdir(
    parents=True, exist_ok=True
)

[ ]:

pixel_list = [
    0,
    1,
    2,
    3072,
    3073,
    3074,
    4349,
    4350,
    4351,
    4413,
    4414,
    4415,
    4429,
    4430,
    4431,
    4433,
    4434,
    4435,
    4436,
    4437,
    4438,
    4439,
    4440,
    4441,
    4442,
    4443,
    4444,
    4445,
    4446,
    4449,
    4450,
    4451,
    4452,
    4453,
    4454,
    4455,
    4456,
    4457,
    4458,
    4459,
    4460,
    4461,
    4462,
    4464,
    4465,
    4466,
    4493,
    4494,
    4495,
    4497,
    4498,
    4499,
    4500,
    4501,
    4502,
    4503,
    4504,
    4505,
    4506,
    4507,
    4508,
    4509,
    4510,
    4513,
    4514,
    4515,
    4516,
    4517,
    4518,
    4519,
    4520,
    4521,
    4522,
    4523,
    4524,
    4525,
    4526,
    4528,
    4529,
    4530,
    4544,
    4545,
    4546,
    4669,
    4670,
    4671,
    4685,
    4686,
    4687,
    4689,
    4690,
    4691,
    4692,
    4693,
    4694,
    4695,
    4696,
    4697,
    4698,
    4699,
    4700,
    4701,
    4702,
    4705,
    4706,
    4707,
    4708,
    4709,
    4710,
    4711,
    4712,
    4713,
    4714,
    4715,
    4716,
    4717,
    4718,
    4720,
    4721,
    4722,
    4749,
    4750,
    4751,
    4753,
    4754,
    4755,
    4756,
    4757,
    4758,
    4759,
    4760,
    4761,
    4762,
    4763,
    4764,
    4765,
    4766,
    4769,
    4770,
    4771,
    4772,
    4773,
    4774,
    4775,
    4776,
    4777,
    4778,
    4779,
    4780,
    4781,
    4782,
    4784,
    4785,
    4786,
    4800,
    4801,
    4802,
    4864,
    4865,
    4866,
    5773,
    5774,
    5775,
    5778,
    5779,
    5784,
    5785,
    5786,
    5787,
    5788,
    5790,
    5793,
    5794,
    5795,
    5796,
    5797,
    5798,
    5799,
    5800,
    5801,
    5802,
    5803,
    5804,
    5805,
    5806,
    5808,
    5809,
    5810,
    7505,
    7506,
    7507,
    7508,
    7509,
    7510,
    7511,
    7512,
    7513,
    7515,
    7516,
    7517,
    7518,
    9213,
    9214,
    9215,
    12285,
    12286,
    12287,
]

[ ]:

import pyarrow as pa
import pyarrow.parquet as pq
from hats.pixel_math import HealpixPixel


rGrid = np.linspace(14, 27, 27)
for pix in pixel_list:
    kdes = []
    xGrids = []
    yGrids = []

    for rind, r in enumerate(rGrid):
        file_name = prior_np_dir / f"{pix}-{rind:02d}.npz"
        prior_contents = np.load(file_name)
        kdes.append(prior_contents["kde"].flatten())
        xGrids.append(prior_contents["xGrid"].flatten())
        yGrids.append(prior_contents["yGrid"].flatten())

    healpixel = HealpixPixel(healpix_order, pix)

    table = pa.Table.from_arrays(
        [
            rGrid,
            kdes,
            xGrids,
            yGrids,
            np.full(len(rGrid), fill_value=healpixel.order, dtype=np.int8),
            np.full(len(rGrid), fill_value=healpixel.dir, dtype=np.int64),
            np.full(len(rGrid), fill_value=healpixel.pixel, dtype=np.int64),
            np.full(
                len(rGrid),
                fill_value=healpixel.pixel * (4 ** (29 - healpixel.order)),
                dtype=np.int64,
            ),
        ],
        names=["rmag", "kde", "xGrid", "yGrid", "Norder", "Dir", "Npix", "_healpix_29"],
    )

    hats_file = (
        destination_dir
        / "dataset"
        / f"Norder={healpixel.order}"
        / f"Dir={healpixel.dir}"
        / f"Npix={healpixel.pixel}.parquet"
    )
    pq.write_table(table, hats_file)

Step 2 - add HATS metadata files

[21]:

import hats.io.file_io as io
from hats.catalog import PartitionInfo
from hats.io import paths
from hats.io.parquet_metadata import write_parquet_metadata

from hats.catalog import TableProperties
from pathlib import Path

from datetime import datetime, timezone
import numpy as np

parquet_rows = write_parquet_metadata(destination_dir)

# Read partition info from _metadata and write to partition_info.csv
partition_info = PartitionInfo.read_from_dir(destination_dir)
partition_info_file = paths.get_partition_info_pointer(destination_dir)
partition_info.write_to_file(partition_info_file)

now = datetime.now(tz=timezone.utc)

catalog_info = TableProperties(
    catalog_name="s82_priors",
    total_rows=parquet_rows,
    hats_order=5,
    hats_builder="Melissa DeLucchi",
    hats_creation_date=now.strftime("%Y-%m-%dT%H:%M%Z"),
    catalog_type="map",
    ra_column=None,
    dec_column=None,
)
catalog_info.to_properties_file(destination_dir)

/home/delucchi/git/hats/src/hats/catalog/partition_info.py:136: UserWarning: Reading partitions from parquet metadata. This is typically slow.
  warnings.warn("Reading partitions from parquet metadata. This is typically slow.")

Step 3 - make sure it’s “ok”

[22]:

from hats import read_hats

catalog = read_hats(destination_dir)
catalog.plot_pixels()

[22]:

(<Figure size 1000x500 with 2 Axes>,
 <WCSAxes: title={'center': 'Catalog pixel density map - s82_priors'}>)

../../_images/pre_executed_data_generation_priors_to_hats_9_1.png

[ ]: