1079 lines
35 KiB
Rust
1079 lines
35 KiB
Rust
extern crate flate2;
|
|
extern crate memmap;
|
|
extern crate nalgebra as na;
|
|
extern crate regex;
|
|
|
|
use crate::color;
|
|
use crate::constants;
|
|
use crate::coord;
|
|
use crate::data;
|
|
use crate::mem;
|
|
use crate::parse;
|
|
use crate::util;
|
|
|
|
use memmap::Mmap;
|
|
use regex::Regex;
|
|
use std::cell::RefCell;
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::io;
|
|
use std::io::BufRead;
|
|
use std::path::Path;
|
|
use std::{f32, f64, fs::File};
|
|
|
|
use flate2::read::GzDecoder;
|
|
use glob::glob;
|
|
use na::base::Vector3;
|
|
|
|
use data::{LargeLongMap, Particle};
|
|
|
|
/// Column content type identifier.
|
|
#[allow(non_camel_case_types, dead_code)]
|
|
#[derive(Copy, Debug, Clone, Eq, PartialEq, Hash)]
|
|
pub enum ColId {
|
|
source_id,
|
|
hip,
|
|
names,
|
|
ra,
|
|
dec,
|
|
plx,
|
|
ra_err,
|
|
dec_err,
|
|
plx_err,
|
|
pmra,
|
|
pmdec,
|
|
radvel,
|
|
pmra_err,
|
|
pmdec_err,
|
|
radvel_err,
|
|
gmag,
|
|
bpmag,
|
|
rpmag,
|
|
bp_rp,
|
|
col_idx,
|
|
ref_epoch,
|
|
teff,
|
|
radius,
|
|
ag,
|
|
ebp_min_rp,
|
|
ruwe,
|
|
geodist,
|
|
fidelity,
|
|
phot_dist,
|
|
empty,
|
|
}
|
|
|
|
impl ColId {
|
|
pub fn to_str(&self) -> &str {
|
|
match self {
|
|
ColId::source_id => "source_id",
|
|
ColId::hip => "hip",
|
|
ColId::names => "names",
|
|
ColId::ra => "ra",
|
|
ColId::dec => "dec",
|
|
ColId::plx => "pllx",
|
|
ColId::ra_err => "ra_err",
|
|
ColId::dec_err => "dec_err",
|
|
ColId::plx_err => "plx_err",
|
|
ColId::pmra => "pmra",
|
|
ColId::pmdec => "pmdec",
|
|
ColId::radvel => "radvel",
|
|
ColId::radvel_err => "radvel_err",
|
|
ColId::gmag => "gmag",
|
|
ColId::bpmag => "bpmag",
|
|
ColId::rpmag => "rpmag",
|
|
ColId::bp_rp => "bp_rp",
|
|
ColId::col_idx => "col_idx",
|
|
ColId::ref_epoch => "ref_epoch",
|
|
ColId::ruwe => "ruwe",
|
|
ColId::teff => "teff",
|
|
ColId::ag => "ag",
|
|
ColId::ebp_min_rp => "ebp_min_rp",
|
|
ColId::geodist => "geodist",
|
|
ColId::fidelity => "fidelity_v1",
|
|
ColId::phot_dist => "phot_dist",
|
|
ColId::empty => "empty",
|
|
_ => "*none*",
|
|
}
|
|
}
|
|
pub fn from_str(input: &str) -> Option<ColId> {
|
|
match input {
|
|
"source_id" => Some(ColId::source_id),
|
|
"sourceid" => Some(ColId::source_id),
|
|
"hip" => Some(ColId::hip),
|
|
"names" => Some(ColId::names),
|
|
"name" => Some(ColId::names),
|
|
"ra" => Some(ColId::ra),
|
|
"dec" => Some(ColId::dec),
|
|
"de" => Some(ColId::dec),
|
|
"plx" => Some(ColId::plx),
|
|
"pllx" => Some(ColId::plx),
|
|
"parallax" => Some(ColId::plx),
|
|
"ra_error" => Some(ColId::ra_err),
|
|
"ra_err" => Some(ColId::ra_err),
|
|
"ra_e" => Some(ColId::ra_err),
|
|
"dec_error" => Some(ColId::dec_err),
|
|
"dec_err" => Some(ColId::dec_err),
|
|
"dec_e" => Some(ColId::dec_err),
|
|
"de_error" => Some(ColId::dec_err),
|
|
"de_err" => Some(ColId::dec_err),
|
|
"de_e" => Some(ColId::dec_err),
|
|
"plx_error" => Some(ColId::plx_err),
|
|
"plx_err" => Some(ColId::plx_err),
|
|
"plx_e" => Some(ColId::plx_err),
|
|
"pllx_error" => Some(ColId::plx_err),
|
|
"pllx_err" => Some(ColId::plx_err),
|
|
"pllx_e" => Some(ColId::plx_err),
|
|
"pmra" => Some(ColId::pmra),
|
|
"pmdec" => Some(ColId::pmdec),
|
|
"pmde" => Some(ColId::pmdec),
|
|
"radvel" => Some(ColId::radvel),
|
|
"rv" => Some(ColId::radvel),
|
|
"radvel_err" => Some(ColId::radvel_err),
|
|
"radvel_e" => Some(ColId::radvel_err),
|
|
"rv_err" => Some(ColId::radvel_err),
|
|
"rv_e" => Some(ColId::radvel_err),
|
|
"phot_g_mean_mag" => Some(ColId::gmag),
|
|
"gmag" => Some(ColId::gmag),
|
|
"appmag" => Some(ColId::gmag),
|
|
"phot_bp_mean_mag" => Some(ColId::bpmag),
|
|
"bpmag" => Some(ColId::bpmag),
|
|
"bp" => Some(ColId::bpmag),
|
|
"phot_rp_mean_mag" => Some(ColId::rpmag),
|
|
"rpmag" => Some(ColId::rpmag),
|
|
"rp" => Some(ColId::rpmag),
|
|
"bp_rp" => Some(ColId::bp_rp),
|
|
"bp-rp" => Some(ColId::bp_rp),
|
|
"col_idx" => Some(ColId::col_idx),
|
|
"b_v" => Some(ColId::col_idx),
|
|
"b-v" => Some(ColId::col_idx),
|
|
"ref_epoch" => Some(ColId::ref_epoch),
|
|
"teff" => Some(ColId::teff),
|
|
"t_eff" => Some(ColId::teff),
|
|
"T_eff" => Some(ColId::teff),
|
|
"ruwe" => Some(ColId::ruwe),
|
|
"ag" => Some(ColId::ag),
|
|
"ag_gspphot" => Some(ColId::ag),
|
|
"ebp_min_rp" => Some(ColId::ebp_min_rp),
|
|
"ebpminrp_gspphot" => Some(ColId::ebp_min_rp),
|
|
"geodist" => Some(ColId::geodist),
|
|
"fidelity" => Some(ColId::fidelity),
|
|
"fidelity_v1" => Some(ColId::fidelity),
|
|
"fidelity_v2" => Some(ColId::fidelity),
|
|
"distance_gspphot" => Some(ColId::phot_dist),
|
|
"dist_phot" => Some(ColId::phot_dist),
|
|
"phot_dist" => Some(ColId::phot_dist),
|
|
"empty" => Some(ColId::empty),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// This holds additional columns for this loader.
|
|
/// The columns are in a large map whose keys are
|
|
/// source IDs. The column data is a vector of f64.
|
|
/// The index in the vector corresponding to each
|
|
/// column can be found in the map indices.
|
|
pub struct Additional {
|
|
pub indices: HashMap<String, usize>,
|
|
pub values: LargeLongMap<Vec<f64>>,
|
|
}
|
|
|
|
impl Additional {
|
|
pub fn empty() -> Self {
|
|
let indices = HashMap::new();
|
|
let values = LargeLongMap::new(50);
|
|
Additional { indices, values }
|
|
}
|
|
|
|
/// Loads a new batch of additional columns from the given file(s)
|
|
pub fn new(file: &&str) -> Option<Self> {
|
|
log::info!("Loading additional columns from {}", file);
|
|
Self::load_dir(file)
|
|
}
|
|
|
|
fn load_dir(dir: &str) -> Option<Self> {
|
|
let path = Path::new(dir);
|
|
if path.exists() {
|
|
let mut addit: Self = Self::empty();
|
|
if path.is_file() && dir.ends_with(".gz") {
|
|
addit.load_file(dir);
|
|
return Some(addit);
|
|
} else {
|
|
// glob directory
|
|
let mut dir_glob: String = String::from(dir);
|
|
dir_glob.push_str("/*");
|
|
let mut count: u32 = 0;
|
|
for entry in glob(&dir_glob).expect("Error reading glob pattern") {
|
|
match entry {
|
|
Ok(path) => {
|
|
addit.load_file(path.to_str().expect("Error: path not valid"));
|
|
count += 1;
|
|
}
|
|
Err(e) => log::error!("Error: {:?}", e),
|
|
}
|
|
}
|
|
log::info!("Additional directory loaded ({}): {} files", dir, count);
|
|
return Some(addit);
|
|
}
|
|
} else {
|
|
log::error!("Path does not exist: {}", dir);
|
|
return None;
|
|
}
|
|
}
|
|
|
|
fn load_file(&mut self, file: &str) {
|
|
let mut total: u64 = 0;
|
|
// Read csv.gz using GzDecoder
|
|
let f = File::open(file).expect("Error: file not found");
|
|
let mmap = unsafe { Mmap::map(&f).expect(&format!("Error mapping file {}", file)) };
|
|
let gz = GzDecoder::new(&mmap[..]);
|
|
|
|
// Separator, multiple spaces or a comma
|
|
let sep = Regex::new(r"\s+|,").unwrap();
|
|
|
|
for line in io::BufReader::new(gz).lines() {
|
|
if total == 0 {
|
|
// Header
|
|
let line_str = line.expect("Error reading line");
|
|
let tokens: Vec<&str> = sep.split(&line_str).collect();
|
|
let mut i = 0;
|
|
for token in tokens {
|
|
if i == 0 && !token.eq("source_id") && !token.eq("sourceid") {
|
|
log::error!(
|
|
"Error: first column '{}' of additional must be '{}'",
|
|
token,
|
|
ColId::source_id.to_str()
|
|
);
|
|
return;
|
|
}
|
|
if i > 0 && !self.indices.contains_key(token) {
|
|
self.indices.insert(token.to_string(), i - 1);
|
|
}
|
|
i += 1;
|
|
}
|
|
} else {
|
|
// Data
|
|
let line_str = line.expect("Error reading line");
|
|
let tokens: Vec<&str> = sep.split(&line_str).collect();
|
|
let source_id: i64 = parse::parse_i64(tokens.get(0));
|
|
let ncols = tokens.len();
|
|
|
|
// Vector with values
|
|
let mut vals: Vec<f64> = Vec::new();
|
|
|
|
for j in 1..ncols {
|
|
let token = tokens.get(j);
|
|
if token.is_none() {
|
|
vals.push(f64::NAN);
|
|
} else {
|
|
if token.unwrap().trim().is_empty() {
|
|
vals.push(f64::NAN);
|
|
} else {
|
|
// Actual value
|
|
let val = parse::parse_f64(token);
|
|
vals.push(val);
|
|
}
|
|
}
|
|
}
|
|
self.values.insert(source_id, vals);
|
|
}
|
|
total += 1;
|
|
if total % 100000 == 0 {
|
|
log::debug!(" object {}", total);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn n_cols(&self) -> usize {
|
|
self.indices.len()
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.values.size
|
|
}
|
|
pub fn has_col(&self, col_id: ColId) -> bool {
|
|
self.indices.contains_key(col_id.to_str())
|
|
}
|
|
|
|
pub fn get(&self, col_id: ColId, source_id: i64) -> Option<f64> {
|
|
if self.has_col(col_id) {
|
|
let index: usize = *self
|
|
.indices
|
|
.get(col_id.to_str())
|
|
.expect("Error: could not get index");
|
|
let sid = self.values.get(source_id);
|
|
if sid.is_some() {
|
|
let v = sid.unwrap().get(index);
|
|
if v.is_some() {
|
|
Some(v.unwrap().clone())
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Loader {
|
|
// Regular expression to separate values in file
|
|
pub sep: Regex,
|
|
// Maximum number of files to load in a directory
|
|
pub max_files: i32,
|
|
// Maximum number of records to load per file
|
|
pub max_records: i32,
|
|
// Zero point for parallaxes
|
|
pub plx_zeropoint: f64,
|
|
// RUWE cap value
|
|
pub ruwe_cap: f32,
|
|
// Distance cap in parsecs
|
|
pub distpc_cap: f64,
|
|
// plx_error criteria for faint stars
|
|
pub plx_err_faint: f64,
|
|
// plx_error criteria for bright stars
|
|
pub plx_err_bright: f64,
|
|
// Cap on the parallax error (stars with larger plx_err are discarded)
|
|
pub plx_err_cap: f64,
|
|
// Whether to use photometric distances when available, and ignore the parallax thresholds
|
|
pub use_phot_dist: bool,
|
|
// Whether to apply mag corrections
|
|
// 0 - no corrections
|
|
// 1 - only if ag and ebr_min_rp are in the catalog
|
|
// 2 - also use analytical alternatives
|
|
pub mag_corrections: u8,
|
|
// If set to true, negative parallaxes will be transformed to the default 0.04 arcsec value
|
|
pub allow_negative_plx: bool,
|
|
// Must-load star ids
|
|
pub must_load: Option<HashSet<i64>>,
|
|
// Additional columns
|
|
pub additional: Vec<Additional>,
|
|
// Indices
|
|
pub indices: HashMap<ColId, usize>,
|
|
// Coordinate conversion
|
|
pub coord: coord::Coord,
|
|
// Star counts per magnitude
|
|
pub counts_per_mag: RefCell<[u32; 22]>,
|
|
|
|
// Counts
|
|
pub total_processed: u64,
|
|
pub total_loaded: u64,
|
|
pub rejected_dist: u64,
|
|
pub rejected_dist_inf: u64,
|
|
pub rejected_dist_neg: u64,
|
|
pub rejected_geodist: u64,
|
|
pub rejected_fidelity: u64,
|
|
pub rejected_plx: u64,
|
|
pub rejected_plx_crit: u64,
|
|
pub rejected_plx_neg: u64,
|
|
pub rejected_ruwe: u64,
|
|
pub rejected_mag: u64,
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
impl Loader {
|
|
pub fn new(
|
|
sep: Regex,
|
|
max_files: i32,
|
|
max_records: i32,
|
|
plx_zeropoint: f64,
|
|
ruwe_cap: f32,
|
|
distpc_cap: f64,
|
|
plx_err_faint: f64,
|
|
plx_err_bright: f64,
|
|
plx_err_cap: f64,
|
|
use_phot_dist: bool,
|
|
mag_corrections: u8,
|
|
allow_negative_plx: bool,
|
|
must_load: Option<HashSet<i64>>,
|
|
additional_str: &str,
|
|
indices_str: &str,
|
|
) -> Self {
|
|
// Additional
|
|
let mut additional = Vec::new();
|
|
if !additional_str.is_empty() {
|
|
let tokens: Vec<&str> = additional_str.split(',').collect();
|
|
for token in tokens {
|
|
let add = Additional::new(&token).expect("Error loading additional");
|
|
log::info!(
|
|
"Loaded {} columns and {} records from {}",
|
|
add.n_cols(),
|
|
add.len(),
|
|
token
|
|
);
|
|
additional.push(add);
|
|
mem::log_mem();
|
|
}
|
|
}
|
|
|
|
// Indices
|
|
let mut indices = HashMap::new();
|
|
let cols: Vec<&str> = indices_str.split(',').collect();
|
|
for i in 0..cols.len() {
|
|
indices.insert(ColId::from_str(cols.get(i).unwrap()).unwrap(), i);
|
|
}
|
|
|
|
Loader {
|
|
sep,
|
|
max_files,
|
|
max_records,
|
|
plx_zeropoint,
|
|
ruwe_cap,
|
|
distpc_cap,
|
|
plx_err_faint,
|
|
plx_err_bright,
|
|
plx_err_cap,
|
|
use_phot_dist,
|
|
mag_corrections,
|
|
allow_negative_plx,
|
|
must_load,
|
|
additional,
|
|
indices,
|
|
coord: coord::Coord::new(),
|
|
counts_per_mag: RefCell::new([0; 22]),
|
|
|
|
total_processed: 0,
|
|
total_loaded: 0,
|
|
rejected_dist: 0,
|
|
rejected_dist_inf: 0,
|
|
rejected_dist_neg: 0,
|
|
rejected_geodist: 0,
|
|
rejected_fidelity: 0,
|
|
rejected_plx: 0,
|
|
rejected_plx_crit: 0,
|
|
rejected_plx_neg: 0,
|
|
rejected_ruwe: 0,
|
|
rejected_mag: 0,
|
|
}
|
|
}
|
|
|
|
pub fn load_dir(&mut self, dir: &str) -> Result<Vec<Particle>, &str> {
|
|
let mut list: Vec<Particle> = Vec::new();
|
|
let mut i = 0;
|
|
|
|
if Path::new(dir).is_file() {
|
|
self.load_file(dir, &mut list, 1, 1);
|
|
} else {
|
|
// glob directory
|
|
let mut dir_glob: String = String::from(dir);
|
|
dir_glob.push_str("/*");
|
|
let dir_count = glob(&dir_glob).expect("Error reading glob pattern").count();
|
|
let count = if self.max_files > 0 {
|
|
usize::min(self.max_files as usize, dir_count)
|
|
} else {
|
|
dir_count
|
|
};
|
|
for entry in glob(&dir_glob).expect("Error reading glob pattern") {
|
|
if self.max_files >= 0 && i >= self.max_files {
|
|
return Ok(list);
|
|
}
|
|
match entry {
|
|
Ok(path) => {
|
|
self.load_file(
|
|
path.to_str().expect("Error: path not valid"),
|
|
&mut list,
|
|
(i + 1) as usize,
|
|
count,
|
|
);
|
|
}
|
|
Err(e) => log::error!("Error: {:?}", e),
|
|
}
|
|
i += 1;
|
|
if i % 10 == 0 {
|
|
mem::log_mem();
|
|
}
|
|
}
|
|
}
|
|
Ok(list)
|
|
}
|
|
|
|
/// Loads a single file, being it csv.gz or csv
|
|
/// The format is hard-coded for now, with csv.gz being in eDR3 format,
|
|
/// and csv being in hipparcos format.
|
|
pub fn load_file(
|
|
&mut self,
|
|
file: &str,
|
|
list: &mut Vec<Particle>,
|
|
file_num: usize,
|
|
file_count: usize,
|
|
) {
|
|
let mut total: usize = 0;
|
|
let mut loaded: usize = 0;
|
|
let mut skipped: usize = 0;
|
|
// Skip weird files
|
|
if !(file.ends_with(".gz") || file.ends_with(".csv") || file.ends_with(".txt")) {
|
|
return;
|
|
}
|
|
let is_gz = file.ends_with(".gz") || file.ends_with(".gzip");
|
|
let f = File::open(file).expect("Error: file not found");
|
|
let mmap = unsafe { Mmap::map(&f).expect(&format!("Error mapping file {}", file)) };
|
|
|
|
let reader: Box<dyn io::BufRead>;
|
|
if is_gz {
|
|
reader = Box::new(io::BufReader::new(GzDecoder::new(&mmap[..])));
|
|
} else {
|
|
reader = Box::new(&mmap[..]);
|
|
}
|
|
|
|
for line in reader.lines() {
|
|
// Skip header
|
|
if total > 0 {
|
|
match self.parse_line(line.expect("Error reading line")) {
|
|
Some(part) => {
|
|
list.push(part);
|
|
loaded += 1;
|
|
}
|
|
None => skipped += 1,
|
|
}
|
|
}
|
|
total += 1;
|
|
if self.max_records >= 0 && (total - 1) as i32 >= self.max_records {
|
|
break;
|
|
}
|
|
}
|
|
self.log_file(loaded, total, skipped, file, file_num, file_count);
|
|
}
|
|
|
|
fn log_file(
|
|
&self,
|
|
loaded: usize,
|
|
total: usize,
|
|
skipped: usize,
|
|
file: &str,
|
|
file_num: usize,
|
|
file_count: usize,
|
|
) {
|
|
log::info!(
|
|
"{}/{} ({:.3}%): {} -> {}/{} stars ({:.3}%, {} skipped)",
|
|
file_num,
|
|
file_count,
|
|
100.0 * file_num as f32 / file_count as f32,
|
|
Path::new(file).file_name().unwrap().to_str().unwrap(),
|
|
loaded,
|
|
total - 1,
|
|
100.0 * loaded as f32 / (total as f32 - 1.0),
|
|
skipped
|
|
);
|
|
}
|
|
|
|
fn get_index(&self, col_id: &ColId) -> usize {
|
|
match self.indices.get(col_id) {
|
|
Some(value) => *value,
|
|
// Set out of range so that tokens.get() produces None
|
|
None => 50000,
|
|
}
|
|
}
|
|
|
|
/// Parses a line using self.indices
|
|
fn parse_line(&mut self, line: String) -> Option<Particle> {
|
|
let tokens: Vec<&str> = self.sep.split(&line).collect();
|
|
|
|
self.create_particle(
|
|
tokens.get(self.get_index(&ColId::source_id)),
|
|
tokens.get(self.get_index(&ColId::hip)),
|
|
tokens.get(self.get_index(&ColId::names)),
|
|
tokens.get(self.get_index(&ColId::ra)),
|
|
tokens.get(self.get_index(&ColId::dec)),
|
|
tokens.get(self.get_index(&ColId::plx)),
|
|
tokens.get(self.get_index(&ColId::phot_dist)),
|
|
tokens.get(self.get_index(&ColId::plx_err)),
|
|
tokens.get(self.get_index(&ColId::pmra)),
|
|
tokens.get(self.get_index(&ColId::pmdec)),
|
|
tokens.get(self.get_index(&ColId::radvel)),
|
|
tokens.get(self.get_index(&ColId::gmag)),
|
|
tokens.get(self.get_index(&ColId::bpmag)),
|
|
tokens.get(self.get_index(&ColId::rpmag)),
|
|
tokens.get(self.get_index(&ColId::col_idx)),
|
|
tokens.get(self.get_index(&ColId::ruwe)),
|
|
tokens.get(self.get_index(&ColId::ag)),
|
|
tokens.get(self.get_index(&ColId::ebp_min_rp)),
|
|
tokens.get(self.get_index(&ColId::teff)),
|
|
)
|
|
}
|
|
|
|
fn must_load_particle(&self, id: i64) -> bool {
|
|
match &self.must_load {
|
|
Some(must_load) => must_load.contains(&id),
|
|
None => false,
|
|
}
|
|
}
|
|
|
|
fn create_particle(
|
|
&mut self,
|
|
ssource_id: Option<&&str>,
|
|
ship_id: Option<&&str>,
|
|
snames: Option<&&str>,
|
|
sra: Option<&&str>,
|
|
sdec: Option<&&str>,
|
|
splx: Option<&&str>,
|
|
sphot_dist: Option<&&str>,
|
|
splx_e: Option<&&str>,
|
|
spmra: Option<&&str>,
|
|
spmdec: Option<&&str>,
|
|
srv: Option<&&str>,
|
|
sappmag: Option<&&str>,
|
|
sbp: Option<&&str>,
|
|
srp: Option<&&str>,
|
|
sbv: Option<&&str>,
|
|
sruwe: Option<&&str>,
|
|
sag: Option<&&str>,
|
|
sebp_min_rp: Option<&&str>,
|
|
steff: Option<&&str>,
|
|
) -> Option<Particle> {
|
|
self.total_processed += 1;
|
|
// Source ID
|
|
let mut source_id: i64 = parse::parse_i64(ssource_id);
|
|
|
|
// First, check if we accept it given the current constraints
|
|
|
|
// Parallax:
|
|
// If it comes from additional, just take it (already zero point-corrected)
|
|
// Otherwise, apply zero point
|
|
let mut plx: f64 = self.get_attribute_or_else(
|
|
ColId::plx,
|
|
source_id,
|
|
parse::parse_f64(splx) - self.plx_zeropoint,
|
|
);
|
|
let plx_e: f64 = parse::parse_f64(splx_e);
|
|
|
|
// Gmag: additional, else column, else use bp and rp
|
|
let mut appmag: f64 =
|
|
self.get_attribute_or_else(ColId::gmag, source_id, parse::parse_f64(sappmag));
|
|
if !appmag.is_finite() {
|
|
if !parse::is_empty(sbp) && !parse::is_empty(srp) {
|
|
let bp: f64 = parse::parse_f64(sbp);
|
|
let rp: f64 = parse::parse_f64(srp);
|
|
appmag = self.gmag_from_xp(bp, rp);
|
|
}
|
|
}
|
|
|
|
let has_fidelity = self.has_additional_col(ColId::fidelity);
|
|
let has_geodist = self.has_additional_col(ColId::geodist);
|
|
|
|
// Distance: photometric distance is in catalog.
|
|
let phot_dist = if self.use_phot_dist && !parse::is_empty(sphot_dist) {
|
|
parse::parse_f64(sphot_dist)
|
|
} else {
|
|
-1.0
|
|
};
|
|
|
|
let hip: i32 = parse::parse_i32(ship_id);
|
|
if source_id == 0 {
|
|
source_id = hip as i64;
|
|
}
|
|
|
|
let must_load = self.must_load_particle(source_id);
|
|
|
|
// Fidelity test.
|
|
if has_fidelity && !self.accept_fidelity(source_id) {
|
|
self.rejected_fidelity += 1;
|
|
return None;
|
|
}
|
|
|
|
if !self.accept_magnitude(appmag) {
|
|
// Stars without magnitude are always rejected.
|
|
self.rejected_mag += 1;
|
|
return None;
|
|
} else if !has_geodist && !self.use_phot_dist {
|
|
// Parallax test, only if there are no geo_distances
|
|
// and we are not using photometric distances (or phot_dist is invalid).
|
|
if plx.is_finite() && plx <= 0.0 {
|
|
// If parallax is negative...
|
|
if self.allow_negative_plx {
|
|
// If allow negative, just set to default positive value (25 kpc).
|
|
plx = 0.04;
|
|
} else {
|
|
// Otherwise, reject.
|
|
self.rejected_plx += 1;
|
|
self.rejected_plx_neg += 1;
|
|
return None;
|
|
}
|
|
} else if !must_load && !self.accept_parallax(appmag, plx, plx_e) {
|
|
// Reject due to parallax criteria.
|
|
self.rejected_plx += 1;
|
|
self.rejected_plx_crit += 1;
|
|
return None;
|
|
}
|
|
}
|
|
|
|
// Extra attributes
|
|
let mut extra: HashMap<ColId, f32> = HashMap::with_capacity(2);
|
|
|
|
let ruwe_val: f32 = self.get_ruwe(source_id, sruwe);
|
|
// RUWE test
|
|
if !must_load && !self.accept_ruwe(ruwe_val) {
|
|
self.rejected_ruwe += 1;
|
|
return None;
|
|
}
|
|
if ruwe_val.is_finite() {
|
|
extra.insert(ColId::ruwe, ruwe_val);
|
|
}
|
|
|
|
// If we have geometric distances, we only accept stars which have one, otherwise
|
|
// we accept all.
|
|
let geodist_pc = self.get_geodistance(source_id);
|
|
let has_geodist_star = geodist_pc > 0.0;
|
|
if !(must_load || !has_geodist || (has_geodist && has_geodist_star)) {
|
|
self.rejected_geodist += 1;
|
|
return None;
|
|
}
|
|
|
|
// Distance
|
|
let dist_pc: f64;
|
|
dist_pc = if self.use_phot_dist {
|
|
if phot_dist > 0.0 {
|
|
phot_dist
|
|
} else {
|
|
-1.0
|
|
}
|
|
} else if has_geodist {
|
|
if geodist_pc > 0.0 {
|
|
geodist_pc
|
|
} else {
|
|
-1.0
|
|
}
|
|
} else {
|
|
1000.0 / plx
|
|
};
|
|
|
|
// Distance test
|
|
if !must_load && !self.accept_distance(dist_pc) {
|
|
self.rejected_dist += 1;
|
|
|
|
if !dist_pc.is_finite() {
|
|
self.rejected_dist_inf += 1;
|
|
}
|
|
if dist_pc <= 0.0 {
|
|
self.rejected_dist_neg += 1;
|
|
}
|
|
|
|
return None;
|
|
}
|
|
let dist: f64 = dist_pc * constants::PC_TO_U;
|
|
|
|
// Parallax error
|
|
let plx_err: f32 = parse::parse_f32(splx_e);
|
|
if plx_err.is_finite() {
|
|
extra.insert(ColId::plx_err, plx_err);
|
|
}
|
|
|
|
// Name
|
|
let mut name_vec = Vec::new();
|
|
if !parse::is_empty(snames) {
|
|
let name_tokens: Vec<&str> = snames.unwrap().split("|").collect();
|
|
for name_token in name_tokens {
|
|
name_vec.push(String::from(name_token));
|
|
}
|
|
}
|
|
|
|
// RA and DEC
|
|
let ra: f64 = parse::parse_f64(sra);
|
|
let dec: f64 = parse::parse_f64(sdec);
|
|
|
|
let pos = util::spherical_to_cartesian(
|
|
ra.to_radians(),
|
|
dec.to_radians(),
|
|
dist.max(constants::NEGATIVE_DIST),
|
|
);
|
|
|
|
// Proper motions
|
|
let mualphastar: f64 = parse::parse_f64(spmra);
|
|
let mudelta: f64 = parse::parse_f64(spmdec);
|
|
let radvel: f64 = parse::parse_f64(srv);
|
|
let mut rv_val: f64 = radvel;
|
|
if rv_val.is_nan() {
|
|
rv_val = 0.0;
|
|
}
|
|
|
|
let pm = util::propermotion_to_cartesian(
|
|
mualphastar,
|
|
mudelta,
|
|
rv_val,
|
|
ra.to_radians(),
|
|
dec.to_radians(),
|
|
dist_pc,
|
|
);
|
|
|
|
// Apparent magnitudes
|
|
let mut ag: f64 = self.get_attribute_or_else(ColId::ag, source_id, parse::parse_f64(sag));
|
|
let pos_eq: Vector3<f64> = Vector3::new(pos.x, pos.y, pos.z);
|
|
let pos_gal: Vector3<f64> = self.coord.eq_gal.transform_vector(&pos_eq);
|
|
let pos_gal_sph = util::cartesian_to_spherical(pos_gal.x, pos_gal.y, pos_gal.z);
|
|
let b = pos_gal_sph.y;
|
|
let magcorraux = f64::min(dist_pc, 150.0 / b.sin().abs());
|
|
|
|
// Analytical extinction, cap to 3.2
|
|
if self.mag_corrections == 2 && !ag.is_finite() {
|
|
ag = f64::min(3.2, magcorraux * 5.9e-4);
|
|
}
|
|
|
|
// Apply only if mag_corrections > 0
|
|
if self.mag_corrections > 0 && ag.is_finite() {
|
|
appmag -= ag;
|
|
}
|
|
|
|
// Absolute magnitude
|
|
let absmag = appmag
|
|
- 5.0
|
|
* f64::log10(if !dist_pc.is_finite() || dist_pc <= 0.0 {
|
|
10.0
|
|
} else {
|
|
dist_pc
|
|
})
|
|
+ 5.0;
|
|
|
|
// Size
|
|
let pseudo_l = f64::powf(10.0, -0.4 * absmag);
|
|
let size_fac = constants::PC_TO_M * constants::M_TO_U * 0.15;
|
|
let size: f32 = f64::min(pseudo_l.powf(0.5) * size_fac, 1e10) as f32;
|
|
|
|
// Color
|
|
let pebr =
|
|
self.get_attribute_or_else(ColId::ebp_min_rp, source_id, parse::parse_f64(sebp_min_rp));
|
|
let ebr: f64 = match self.mag_corrections {
|
|
// No corrections
|
|
0 => 0.0,
|
|
// Only form catalog
|
|
1 => {
|
|
if pebr.is_finite() {
|
|
pebr
|
|
} else {
|
|
0.0
|
|
}
|
|
}
|
|
// From catalog, if not, analytical
|
|
2 => {
|
|
if pebr.is_finite() {
|
|
pebr
|
|
} else {
|
|
f64::min(1.6, magcorraux * 2.9e-4)
|
|
}
|
|
}
|
|
// Default is zero
|
|
_ => 0.0,
|
|
};
|
|
|
|
let col_idx: f64;
|
|
let mut teff: f64 = parse::parse_f64(steff);
|
|
if !parse::is_empty(sbp) && !parse::is_empty(srp) {
|
|
let bp: f64 = parse::parse_f64(sbp);
|
|
let rp: f64 = parse::parse_f64(srp);
|
|
col_idx = bp - rp - ebr;
|
|
|
|
if !teff.is_finite() {
|
|
teff = color::xp_to_teff(col_idx);
|
|
}
|
|
} else if !parse::is_empty(sbv) {
|
|
col_idx = parse::parse_f64(sbv);
|
|
teff = color::bv_to_teff_ballesteros(col_idx);
|
|
} else {
|
|
col_idx = 0.656;
|
|
teff = color::bv_to_teff_ballesteros(col_idx);
|
|
}
|
|
let (col_r, col_g, col_b) = color::teff_to_rgb(teff);
|
|
let color_packed: f32 = color::col_to_f32(col_r as f32, col_g as f32, col_b as f32, 1.0);
|
|
|
|
// Update counts per mag
|
|
let appmag_clamp = f64::clamp(appmag, 0.0, 21.0) as usize;
|
|
self.counts_per_mag.borrow_mut()[appmag_clamp] += 1;
|
|
|
|
self.total_loaded += 1;
|
|
if self.total_loaded % 100000 == 0 {
|
|
log::debug!(" object {}", self.total_loaded);
|
|
}
|
|
Some(Particle {
|
|
x: pos.x,
|
|
y: pos.y,
|
|
z: pos.z,
|
|
pmx: pm.x as f32,
|
|
pmy: pm.y as f32,
|
|
pmz: pm.z as f32,
|
|
mualpha: mualphastar as f32,
|
|
mudelta: mudelta as f32,
|
|
radvel: radvel as f32,
|
|
appmag: appmag as f32,
|
|
absmag: absmag as f32,
|
|
col: color_packed,
|
|
size,
|
|
hip,
|
|
id: source_id,
|
|
names: name_vec,
|
|
extra,
|
|
})
|
|
}
|
|
|
|
fn accept_magnitude(&self, appmag: f64) -> bool {
|
|
appmag.is_finite()
|
|
}
|
|
|
|
fn accept_parallax(&self, appmag: f64, plx: f64, plx_e: f64) -> bool {
|
|
if !plx.is_finite() {
|
|
return false;
|
|
} else if appmag < 13.1 {
|
|
return plx >= 0.0 && plx_e < plx * self.plx_err_bright && plx_e < self.plx_err_cap;
|
|
} else {
|
|
return plx >= 0.0 && plx_e < plx * self.plx_err_faint && plx_e < self.plx_err_cap;
|
|
}
|
|
}
|
|
|
|
fn accept_distance(&self, dist_pc: f64) -> bool {
|
|
// Accept if it is finite, it is greater than zero.
|
|
// The distance cap check happens after the cross-match with Hipparcos.
|
|
dist_pc.is_finite() && dist_pc > 0.0
|
|
}
|
|
|
|
fn accept_fidelity(&self, source_id: i64) -> bool {
|
|
let fidelity = self.get_additional(ColId::fidelity, source_id);
|
|
match fidelity {
|
|
Some(d) => return d > 0.5,
|
|
None => false,
|
|
}
|
|
}
|
|
|
|
fn get_geodistance(&self, source_id: i64) -> f64 {
|
|
let geodist = self.get_additional(ColId::geodist, source_id);
|
|
match geodist {
|
|
Some(d) => {
|
|
if d.is_finite() {
|
|
d
|
|
} else {
|
|
-1.0
|
|
}
|
|
}
|
|
None => -1.0,
|
|
}
|
|
}
|
|
|
|
fn accept_ruwe(&self, ruwe: f32) -> bool {
|
|
ruwe.is_nan() || self.ruwe_cap.is_nan() || ruwe < self.ruwe_cap
|
|
}
|
|
|
|
fn get_ruwe(&self, source_id: i64, sruwe: Option<&&str>) -> f32 {
|
|
if self.has_col(ColId::ruwe) {
|
|
parse::parse_f32(sruwe)
|
|
} else {
|
|
let ruwe = self.get_additional(ColId::ruwe, source_id);
|
|
match ruwe {
|
|
Some(d) => {
|
|
if d.is_finite() {
|
|
d as f32
|
|
} else {
|
|
f32::NAN
|
|
}
|
|
}
|
|
None => f32::NAN,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get side-loaded attributes, or a default value.
|
|
/// If the column with the given ID is in the additional list, return it.
|
|
/// Otherwise, return the given `orelse` default value.
|
|
///
|
|
/// # Arguments
|
|
/// * `col_id` - The column identifier.
|
|
/// * `source_id` - The source ID to look up.
|
|
/// * `orelse` - The default value to return if the given `col_id` for the `source_id` is not found in additional.
|
|
fn get_attribute_or_else(&self, col_id: ColId, source_id: i64, orelse: f64) -> f64 {
|
|
match self.get_additional(col_id, source_id) {
|
|
Some(val) => {
|
|
if val.is_finite() {
|
|
val
|
|
} else {
|
|
orelse
|
|
}
|
|
}
|
|
None => orelse,
|
|
}
|
|
}
|
|
|
|
fn get_additional(&self, col_id: ColId, source_id: i64) -> Option<f64> {
|
|
if self.additional.is_empty() {
|
|
None
|
|
} else {
|
|
for entry in &self.additional {
|
|
if entry.has_col(col_id) {
|
|
return entry.get(col_id, source_id);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
fn has_col(&self, col_id: ColId) -> bool {
|
|
!self.indices.is_empty()
|
|
&& self.indices.contains_key(&col_id)
|
|
&& self.indices.get(&col_id).expect("Error getting column") >= &0
|
|
}
|
|
|
|
fn has_additional_col(&self, col_id: ColId) -> bool {
|
|
if self.additional.is_empty() {
|
|
false
|
|
} else {
|
|
for entry in &self.additional {
|
|
if entry.has_col(col_id) {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
}
|
|
|
|
fn has_additional(&self, col_id: ColId, source_id: i64) -> bool {
|
|
let val = self.get_additional(col_id, source_id);
|
|
match val {
|
|
Some(v) => v.is_finite(),
|
|
None => false,
|
|
}
|
|
}
|
|
|
|
fn gmag_from_xp(&self, bp: f64, rp: f64) -> f64 {
|
|
if !bp.is_finite() || !rp.is_finite() {
|
|
return f64::NAN;
|
|
} else {
|
|
// jordan email (Thu, 22 Apr 2021 12:35:11 )
|
|
// gmag = -2.5*log10(10.0**((25.3385-phot_bp_mean_mag)/2.5)+10.0**((24.7479-phot_rp_mean_mag)/2.5))+25.6874
|
|
return -2.5
|
|
* f64::log10(
|
|
10.0_f64.powf((25.3385 - bp) / 2.5) + 10.0_f64.powf((24.7479 - rp) / 2.5),
|
|
)
|
|
+ 25.6874;
|
|
}
|
|
}
|
|
|
|
pub fn report_rejected(&self) {
|
|
log::info!(
|
|
"::: TOTAL LOADED/PROCESSED: {}/{}",
|
|
self.total_loaded,
|
|
self.total_processed
|
|
);
|
|
log::info!(
|
|
" - Rejected due to parallax (criteria/negative): {}",
|
|
self.rejected_plx
|
|
);
|
|
log::info!(" - criteria: {}", self.rejected_plx_crit);
|
|
log::info!(" - negative: {}", self.rejected_plx_neg);
|
|
log::info!(
|
|
" - Rejected due to non-finite magnitude: {}",
|
|
self.rejected_mag
|
|
);
|
|
log::info!(" - Rejected due to distance: {}", self.rejected_dist);
|
|
log::info!(" - infinite: {}", self.rejected_dist_inf);
|
|
log::info!(" - null/negative: {}", self.rejected_dist_neg);
|
|
log::info!(
|
|
" - Rejected due to geo-distance (not present): {}",
|
|
self.rejected_geodist
|
|
);
|
|
log::info!(
|
|
" - Rejected due to fidelity (criteria/): {}",
|
|
self.rejected_fidelity
|
|
);
|
|
log::info!(
|
|
" - Rejected due to ruwe (criteria): {}",
|
|
self.rejected_ruwe
|
|
);
|
|
}
|
|
}
|