简化 python 代码并消除重复

Streamlining python code and removing duplication

提问人:AlbPet 提问时间:9/11/2023 更新时间:9/11/2023 访问量:39

问:

我有编码的光谱,包括一条直线,以连续校正 (b) laze 校正、(N) ormalised 和简单的 (c) osmic 射线修剪数据(bNc 数据)。我认为我的代码变得非常混乱,我想确保当我将其保存到文件夹中名为“Prepare.mat”的文件中时,我可以访问下一步所需的所有变量。我认为有一些重复,但我想确保我已正确保存它和所有变量。我的光谱绘制正确,但是,它仍然加载了糟糕的观测结果。有人可以关注这一点并检查、重复、简化(如果可能的话)。

import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
from scipy.io import loadmat, savemat
from scipy.signal import medfilt
import scipy.io
import scipy.io as sio  # Import scipy.io as sio
%matplotlib inline
%matplotlib widget


# Directory where the MATLAB files are located
output_folder = '/Users/XXX/Desktop/Starx/'

# Define dimensions
row_size = 29     # Define the number of rows
col_size =  2047  # Define the number of columns
num_obs = 23

# Get a list of .mat files in the directory
mat_files = [filename for filename in os.listdir(directory) if filename.startswith('202') and filename.endswith('.mat')]


# Initialize arrays to store data
num_files = len(mat_files)
ALLcalspec1 = np.empty((row_size, col_size, num_obs))
ALLcalspec2 = np.empty((row_size, col_size, num_obs))
ALLfib4spec = np.empty((row_size, col_size, num_obs))
ALLfib4spec0 = np.empty((row_size, col_size, num_obs))
ALLfib4specwave = np.empty((row_size, col_size, num_obs))
ALLfib4specbNc = np.empty((row_size, col_size, num_obs))
ALLfib4specerr = np.empty((row_size, col_size, num_obs))
ALLfib4specS = np.empty((row_size, col_size, num_obs))
ALLJD = np.empty(num_files)

# Load data from .mat files
for s, filename in enumerate(mat_files):
    data = sio.loadmat(os.path.join(directory, filename))
    calspec1 = data.get('calspec1')
    calspec2 = data.get('calspec2')
    fib4_spec = data.get('fib4_spec')
    fib4_spec0 = data.get('fib4_spec0')
    fib4_spec_wave = data.get('fib4_spec_wave')
    fib4_spec_bNc = data.get('fib4_spec_bNc')
    fib4_specerr = data.get('fib4_specerr')
    fib4_specS = data.get('fib4_specS')
    JD = data.get('JD')
    
    ALLcalspec1[:, :, s] = calspec1
    ALLcalspec2[:, :, s] = calspec2
    ALLfib4spec[:, :, s] = fib4_spec
    ALLfib4spec0[:, :, s] = fib4_spec0
    ALLfib4specwave[:, :, s] = fib4_spec_wave
    ALLfib4specbNc[:, :, s] = fib4_spec_bNc
    ALLfib4specerr[:, :, s] = np.real(fib4_specerr)
    ALLfib4specS[:, :, s] = fib4_specS
    ALLJD[s] = JD
    
# Directory where the synthetic spectrum MATLAB file is located
synthetic_spectrum_file = '/Users/XXX/Desktop/StarX/synth_teff4500_logg45.mat'   
    
# Load the synthetic spectrum data using scipy's loadmat function
synthetic_spectrum_data = loadmat(synthetic_spectrum_file)

# Extract the relevant data from the loaded dictionary
synthetic_wave = synthetic_spectrum_data.get('wavelength', np.array([]))

# Load the 'bcorr_fib4_all.mat' file using h5py
hdf5_file_path = '/Users/XXX/Desktop/StarX/bcorr_fib4_all.mat'

with h5py.File(hdf5_file_path, 'r') as hdf5_file:
    # Assuming 'bcorr_fib4_all' is a dataset in the HDF5 file, you can load it like this:
    bcorr_fib4_all = hdf5_file['bcorr_fib4_all'][:]

    
# Calculate 'ti' without transposing, and calculate median along axis 0 (first dimension)
ti = np.median(ALLfib4spec, axis=0)

# Create a boolean mask for 'bad' observations
bad_obs_mask = np.median(ti, axis=1) < 10000

# Apply the filter along the first dimension for all relevant arrays
ALLfib4spec0 = ALLfib4spec0[:, ~bad_obs_mask, :]
ALLfib4specbNc = ALLfib4specbNc[:, ~bad_obs_mask, :]
ALLfib4specerr = ALLfib4specerr[:, ~bad_obs_mask, :]
ALLfib4specwave = ALLfib4specwave[:, ~bad_obs_mask, :]
ALLcalspec1 = ALLcalspec1[:, ~bad_obs_mask]
ALLcalspec2 = ALLcalspec2[:, ~bad_obs_mask]
ALLJD = [jd for i, jd in enumerate(ALLJD) if not bad_obs_mask[i]]
bcorr_fib4_all = [bcorr for i, bcorr in enumerate(bcorr_fib4_all) if not bad_obs_mask[i]]

# Continuum correction

nALLfib4specbNc = np.copy(ALLfib4specbNc)
print('obs: ', end='')

for obs in range(nALLfib4specbNc.shape[2]):
    print(f'{obs + 1} ', end='')
    A = nALLfib4specbNc[:, :, obs]
    nA = np.empty_like(A)

    for ord in range(A.shape[0]):
        x = A[ord, :]
        window_size = 301  # Adjust this as needed
        x_med = medfilt(x, kernel_size=window_size)
        nA[ord, :] = np.divide(x, x_med, out=np.zeros_like(x), where=x_med != 0)

    nALLfib4specbNc[:, :, obs] = nA

print('\n')

# Clip values to remove outliers
nALLfib4specbNc[nALLfib4specbNc > 1.2] = 1.2
nALLfib4specbNc[nALLfib4specbNc < 0.01] = 0.01

# Save 'nALLfib4specbNc' to 'Prepared.mat'
nALLfib4specbNc_variable_name = 'nALLfib4specbNc'  


# Create a dictionary to store the variables you want to save
data_to_save = {}

# Collect all variables that start with 'ALL' from your variables
for var_name in globals().keys():
    if var_name.startswith('ALL'):
        data_to_save[var_name] = globals()[var_name]

# Specify the file name for saving
file_name = os.path.join(output_folder, 'Prepared.mat')

# Save the variables to a .mat file
scipy.io.savemat(file_name, data_to_save)
Python Matlab 简化

评论


答: 暂无答案