Import Gaia_x_SDSS to HATS

Import Gaia_x_SDSS to HATS#

[9]:
from astroquery.gaia import Gaia
import numpy as np
Gaia.login()
INFO: Login to gaia TAP server [astroquery.gaia.core]
User:  lpalaver
Password:  ········
INFO: OK [astroquery.utils.tap.core]
INFO: Login to gaia data server [astroquery.gaia.core]
INFO: OK [astroquery.utils.tap.core]
[10]:
cd "/mnt/beegfs/scratch/data/Gaia-SDSS/fits/with_extinction"
/mnt/beegfs/scratch/data/Gaia-SDSS/fits/with_extinction
/nvme/lovro/miniconda3/envs/photod-testing/lib/python3.12/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
Crossmatch Gaia stars with "good" parallaxes to SDSS photometry. N.B. Parallaxes may be simply inverted to distances if parallax_over_error > 10 (this is a rough approximation!).
[8]:
for i,j in zip(np.arange(0, 2000000000, 100000000)[:-1], np.arange(0, 2000000000, 100000000)[1:]):
    query = f"SELECT G.random_index, G.ra, G.dec, G.phot_g_mean_mag, G.parallax, G.parallax_error, D.*,\
    S.objid, S.type, S.psfmag_u, S.psfmag_g, S.psfmag_r, S.psfmag_i, S.psfmag_z, S.psfmagerr_u, S.psfmagerr_g, S.psfmagerr_r, S.psfmagerr_i, S.psfmagerr_z, \
    S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z\
FROM gaiadr3.gaia_source AS G \
JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id \
JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id \
JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid \
WHERE G.parallax_over_error > 5 \
    AND G.visibility_periods_used > 10 \
    AND G.ruwe < 1.4 \
    AND G.in_qso_candidates = 'f' \
    AND G.in_galaxy_candidates = 'f' \
    AND G.random_index >= {i} \
    AND G.random_index <  {j}".format(i, j)
    print(i, j, "\n", query, "\n")
    job = Gaia.launch_job_async(query, dump_to_file=True, output_format='fits')
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[8], line 1
----> 1 for i,j in zip(np.arange(0, 2000000000, 100000000)[:-1], np.arange(0, 2000000000, 100000000)[1:]):
      2     query = f"SELECT G.random_index, G.ra, G.dec, G.phot_g_mean_mag, G.parallax, G.parallax_error, D.*,\
      3  S.objid, S.type, S.psfmag_u, S.psfmag_g, S.psfmag_r, S.psfmag_i, S.psfmag_z, S.psfmagerr_u, S.psfmagerr_g, S.psfmagerr_r, S.psfmagerr_i, S.psfmagerr_z, \
      4     S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z\
   (...)
     14  AND G.random_index >= {i} \
     15  AND G.random_index <  {j}".format(i, j)
     16     print(i, j, "\n", query, "\n")

NameError: name 'np' is not defined
[ ]:
# Add extinction column only

for i,j in zip(np.arange(0, 2000000000, 100000000)[:-1], np.arange(0, 2000000000, 100000000)[1:]):
    query = f"SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z \
FROM gaiadr3.gaia_source AS G \
JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id \
JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id \
JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid \
WHERE G.parallax_over_error > 5 \
    AND G.visibility_periods_used > 10 \
    AND G.ruwe < 1.4 \
    AND G.in_qso_candidates = 'f' \
    AND G.in_galaxy_candidates = 'f' \
    AND G.random_index >= {i} \
    AND G.random_index <  {j}".format(i, j)
    print(i, j, "\n", query, "\n")
    job = Gaia.launch_job_async(query, dump_to_file=True, output_format='fits')
0 100000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 0      AND G.random_index <  100000000

200000000 300000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 200000000      AND G.random_index <  300000000

300000000 400000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 300000000      AND G.random_index <  400000000

400000000 500000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 400000000      AND G.random_index <  500000000

500000000 600000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 500000000      AND G.random_index <  600000000

600000000 700000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 600000000      AND G.random_index <  700000000

700000000 800000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 700000000      AND G.random_index <  800000000

800000000 900000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 800000000      AND G.random_index <  900000000

900000000 1000000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 900000000      AND G.random_index <  1000000000

1000000000 1100000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 1000000000     AND G.random_index <  1100000000

1100000000 1200000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 1100000000     AND G.random_index <  1200000000

1200000000 1300000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 1200000000     AND G.random_index <  1300000000

1300000000 1400000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 1300000000     AND G.random_index <  1400000000

1400000000 1500000000
 SELECT G.source_id, S.extinction_u, S.extinction_g, S.extinction_r, S.extinction_i, S.extinction_z FROM gaiadr3.gaia_source AS G JOIN external.gaiaedr3_distance AS D ON G.source_id = D.source_id JOIN gaiadr3.sdssdr13_best_neighbour AS BN ON G.source_id = BN.source_id JOIN external.sdssdr13_photoprimary AS S ON BN.original_ext_source_id = S.objid WHERE G.parallax_over_error > 5         AND G.visibility_periods_used > 10   AND G.ruwe < 1.4     AND G.in_qso_candidates = 'f'   AND G.in_galaxy_candidates = 'f'        AND G.random_index >= 1400000000     AND G.random_index <  1500000000

[5]:
from dask.distributed import Client
from hats_import.pipeline import pipeline_with_client
from hats_import.catalog.arguments import ImportArguments
[6]:
client = Client(n_workers=96, threads_per_worker=1, memory_limit="15GiB")
client
/nvme/lovro/miniconda3/envs/photod-testing/lib/python3.12/site-packages/distributed/node.py:187: UserWarning: Port 8787 is already in use.
Perhaps you already have a cluster running?
Hosting the HTTP server on port 46547 instead
  warnings.warn(
[6]:

Client

Client-9cce9fb7-b7d7-11ef-8faf-6cfe548c2c54

Connection method: Cluster object Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:46547/status

Cluster Info

[7]:
args = ImportArguments(
    sort_columns="source_id",
    ra_column="ra",
    dec_column="dec",
    input_path="/mnt/beegfs/scratch/data/Gaia-SDSS/fits/",
    file_reader="fits",
    output_artifact_name="Gaia-SDSS",
    output_path="/mnt/beegfs/scratch/data/Gaia-SDSS/hats/"
)
[8]:
pipeline_with_client(args, client)