Here, you can find the code used to compute the 4-point correlation function (4PCF) on the CATS MHD simulations. Below are links to the main code repositories, along with key scripts used for various tasks in the process.
The code we use for computing the 4PCF is SARABANDE, a new Python package designed for efficiently measuring 3-point and 4-point correlation functions on gridded data using Fast Fourier Transforms (FFTs). This FFT-based approach offers a complexity scaling of O(N log N), making it the fastest method available for computing the 4PCF to date.
For a complete description of the method SARABANDE uses and the underlying algorithms, please refer to the original paper:
https://academic.oup.com/rasti/article/2/1/62/7000851
The linked GitHub to SARABANDE provides access to the full set of scripts for running the framework. This repository includes all necessary code, along with documentation and usage instructions for applying SARABANDE to your own data.
Data Acquisition- This script automates the process of accessing and downloading the required simulation files from the CATS MHD database.
import os
from tqdm import tqdm
for i in tqdm(range(500, 901, 50)):
os.system(f'wget https://users.flatironinstitute.org/~bburkhart/data/CATS/MHD/256/b1p.01/t_{i}/dens_t{i}.fits.gz')
os.system(f'mv dens_*.fits.gz Ms_7p0__Ma_0p7')
print("Finished First Round of Data Downloading")
for i in tqdm(range(500, 901, 50)):
os.system(f'wget https://users.flatironinstitute.org/~bburkhart/data/CATS/MHD/256/b1p.32/t_{i}/dens_t{i}.fits.gz')
os.system(f'mv dens_*.fits.gz Ms_1p2__Ma_0p7')
print("Finished Second Round of Data Downloading")
for i in tqdm(range(500, 901, 50)):
os.system(f'wget https://users.flatironinstitute.org/~bburkhart/data/CATS/MHD/256/b.1p.01/t_{i}/dens_t{i}.fits')
os.system(f'mv dens_*.fits.gz Ms_7p0__Ma_2p0')
print("Finished Third Round of Data Downloading")
for i in tqdm(range(500, 901, 50)):
os.system(f'wget https://users.flatironinstitute.org/~bburkhart/data/CATS/MHD/256/b.1p.32/t_{i}/dens_t{i}.fits.gz')
os.system(f'mv dens_*.fits.gz Ms_1p2__Ma_2p0')
print("Finished Fourth Round of Data Downloading")
4PCF Execution- This script is designed to execute the 4PCF calculation on a supercomputer. It allows for the manipulation of parameters such as Mach and Alfvénic numbers, the maximum ℓ value, and computational constraints.
import numpy as np
import astropy.io.fits as pyf
import os
import sarabande
import time
import argparse
########################################
# COMMAND LINE ARGUMENTS
########################################
def get_args():
#Command Line Arguments
parser = argparse.ArgumentParser(description='4pcf script')
parser.add_argument('-n', '--ncpus', type=int, required=False,
help="""Number of CPU cores to use. DEFAULT=${SLURM_CPUS_ON_NODE}""")
parser.add_argument('-ell_max', type=int,
help="""This is the maximum ell value.""")
parser.add_argument('-n_bins', type=int,
help="""This is the number of bins.""")
parser.add_argument('-Ma', type=float,
help="""This is the Alfvenic number.""")
parser.add_argument('-Ms', type=float,
help="""This is the Mach number.""")
parser.add_argument('-t_slice', type=int,
help="""This is the time slice.""")
results = parser.parse_args()
print(f"We will be using ell_max {results.ell_max} in our data.")
print(f"We will be using {results.n_bins} bins in our data.")
print(f"We will be using {results.t_slice} time in our data.")
print(f"We will be using {results.Ma} Alfvenic number in our data.")
print(f"We will be using {results.Ms} Mach number in our data.")
return results
########################################
# FUNCTIONS
########################################
def open_file(Ms, Ma, t_slice):
"""
Arguments:
Ms [float]: The sonic mach number of the density field we’re using
Ma [float]: The Alfvenic mach number of the density field we’re using
t_slice [int]: the time slice of the simulation a value that can be [500, 550, … 900]
returns: data [numpy array] of shape (256, 256, 256) representing the density field of a simulation after it has been reformatted to a log-normal transformation
"""
if Ms == 0.7 and Ma == 0.7:
Simtype = 'Ms_0p7__Ma_0p7/'
elif Ms == 0.7 and Ma == 2.0:
Simtype = 'Ms_0p7__Ma_2p0/'
elif Ms == 1.2 and Ma == 0.7:
Simtype = 'Ms_1p2__Ma_0p7/'
elif Ms == 1.2 and Ma == 2.0:
Simtype = 'Ms_1p2__Ma_2p0/'
elif Ms == 7.0 and Ma == 0.7:
Simtype = 'Ms_7p0__Ma_0p7/'
elif Ms == 7.0 and Ma == 2.0:
Simtype = 'Ms_7p0__Ma_2p0/'
else:
Simtype = None #throw error
Folder = "/blue/zslepian/williamson.v/data/" + Simtype + f"dens_t{t_slice}.fits.gz"
hdulist = pyf.open(Folder)
data = hdulist[0].data.astype(np.float64) #here’s our data
#reformatted the data to a log-normal transformation
data = np.log(data)
data = (data-np.mean(data))/(np.std(data))
return data
def calc_4pcf(data, Ms, Ma, t_slice, nbins, ell_max, ncpus):
"""
Arguments:
data [np.array] : The density field loaded in,
should be a log normal density field
Ms [float] : The sonic mach number of the density
field we’re using
Ma [float] : The Alfvenic mach number of the density
field we’re using
t_slice [int] : the time slice of the simulation a
value that can be [500, 550, … 900]
nbins [int] : number of radial bins we’re using
ell_max [int] : the maximum ell value we’re using
returns → returns nothing, but it saves the full and disconnected 4pcf measurements to numpy files
"""
#string to directory to save temporary calculations into
save_dir = '/blue/zslepian/williamson.v/output/'
#make a unique identifier string
if Ms == 0.7 and Ma == 0.7:
Simtype = 'Ms_0p7__Ma_0p7'
elif Ms == 0.7 and Ma == 2.0:
Simtype = 'Ms_0p7__Ma_2p0'
elif Ms == 1.2 and Ma == 0.7:
Simtype = 'Ms_1p2__Ma_0p7'
elif Ms == 1.2 and Ma == 2.0:
Simtype = 'Ms_1p2__Ma_2p0'
elif Ms == 7.0 and Ma == 0.7:
Simtype = 'Ms_7p0__Ma_0p7'
elif Ms == 7.0 and Ma == 2.0:
Simtype = 'Ms_7p0__Ma_2p0'
else:
Simtype = None #throw error
save_name = f"_{Simtype}_{t_slice}_"
print(f"\nThe save name is {save_name}.\n")
#create measure_obj
_4PCF = sarabande.measure(nPCF=4, projected=False,
density_field_data = data, save_dir=save_dir,
save_name=save_name, nbins=nbins, ell_max=ell_max,
physical_boxsize=256, rmin=1e-14, rmax=128, normalize=True)
print("\nObject has been made, running function now.\n")
sarabande.calc_zeta(_4PCF, verbose_flag=True, parallelized=True,
calc_bin_overlaps=False, calc_odd_modes=True, calc_disconnected=True, store_in_memory=True, max_workers=ncpus)
#save files
np.save(f"zetas/4pcf_full_{save_name}.npy", _4PCF.zeta); np.save(f"zetas/4pcf_disconnected_{save_name}.npy", _4PCF.zeta_disconnected)
return None
##############################
# WE RUN CODE BELOW
##############################
def main():
"""Main function"""
args = get_args()
if not args.ncpus:
try:
ncpus = int(os.getenv("SLURM_CPUS_ON_NODE"))
except ValueError as e:
print(f"No ncpus argument and no SLURM_CPUS_ON_NODE env variable. Bailing out.")
sys.exit(2)
#Loading in the Data
data = open_file(args.Ms, args.Ma, args.t_slice)
print(f"\nThe data has been loaded in.\n")
start = time.time()
# calculate the 4PCF
calc_4pcf(data, args.Ms, args.Ma, args.t_slice, args.n_bins, args.ell_max, ncpus)
end = time.time()
#Results
print(f"\nIt took {end - start} seconds to run and it successfully saved!\n")
if __name__ == "__main__": #for scripts only
main()