# -*- coding: utf-8 -*-
import numpy as np
import os

"""
Convert the ASCII format of the USGS Spectral Library (splib06a) to a numpy
binary file.

The numpy file contains 3 dicts (keys of all dicts are the directory names
under ASCII). The values are:
    - human-readable name for directory
    - list of filenames in the directory
    - list of data arrays (one per file)
        cols: wavelength (um), reflectance, stddev
"""

labels = {'A': 'man-made', 'C': 'coatings', 'L': 'volatiles', 'M': 'minerals',
          'S': 'mixtures', 'V': 'vegetation'}
names = {}
data = {}
ASCII_path = '.splib06a/ASCII'

for currdir, subdirs, files in os.walk(ASCII_path):
    if currdir == ASCII_path:
        continue
    key = os.path.relpath(currdir, ASCII_path)
    namelist = []
    datalist = []
    for file in files:
        filepath = os.path.join(currdir, file)
        print(filepath)
        namelist.append(os.path.splitext(file)[0])
        arr = np.loadtxt(filepath, skiprows=16)
        arr[arr == -1.23e34] = np.nan
        datalist.append(arr)
    names[key] = namelist
    data[key] = datalist

np.savez_compressed('splib06a.npz', labels=labels, names=names, data=data)
