Coastlines/readWBAN.m

133 lines
5.4 KiB
Matlab

function lcdstations = importfile(filename, startRow, endRow)
%IMPORTFILE Import numeric data from a text file as a matrix.
% LCDSTATIONS = IMPORTFILE(FILENAME) Reads data from text file FILENAME
% for the default selection.
%
% LCDSTATIONS = IMPORTFILE(FILENAME, STARTROW, ENDROW) Reads data from
% rows STARTROW through ENDROW of text file FILENAME.
%
% Example:
% lcdstations = importfile('lcd-stations.txt', 1, 272);
%
% See also TEXTSCAN.
% Auto-generated by MATLAB on 2019/11/25 12:59:26
%% Initialize variables.
if nargin<=2
startRow = 1;
endRow = inf;
end
%% Read columns of data as text:
% For more information, see the TEXTSCAN documentation.
formatSpec = '%5s%5s%11s%20s%11s%6s%4s%12s%12s%52s%61s%26s%2s%3s%9s%11s%11s%9s%4s%s%[^\n\r]';
%% Open the text file.
fileID = fopen(filename,'r');
%% Read columns of data according to the format.
% This call is based on the structure of the file used to generate this
% code. If an error occurs for a different file, try regenerating the code
% from the Import Tool.
dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'HeaderLines', startRow(1)-1, 'ReturnOnError', false, 'EndOfLine', '\r\n');
for block=2:length(startRow)
frewind(fileID);
dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'HeaderLines', startRow(block)-1, 'ReturnOnError', false, 'EndOfLine', '\r\n');
for col=1:length(dataArray)
dataArray{col} = [dataArray{col};dataArrayBlock{col}];
end
end
%% Remove white space around all cell columns.
dataArray{2} = strtrim(dataArray{2});
dataArray{4} = strtrim(dataArray{4});
dataArray{5} = strtrim(dataArray{5});
dataArray{7} = strtrim(dataArray{7});
dataArray{10} = strtrim(dataArray{10});
dataArray{11} = strtrim(dataArray{11});
%% Close the text file.
fclose(fileID);
%% Convert the contents of columns containing numeric text to numbers.
% Replace non-numeric text with NaN.
raw = repmat({''},length(dataArray{1}),length(dataArray)-1);
for col=1:length(dataArray)-1
raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1));
end
numericData = NaN(size(dataArray{1},1),size(dataArray,2));
for col=[1,6,8,9,16,17,18,20]
% Converts text in the input cell array to numbers. Replaced non-numeric
% text with NaN.
rawData = dataArray{col};
for row=1:size(rawData, 1)
% Create a regular expression to detect and remove non-numeric prefixes and
% suffixes.
regexstr = '(?<prefix>.*?)(?<numbers>([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?<suffix>.*)';
try
result = regexp(rawData(row), regexstr, 'names');
numbers = result.numbers;
% Detected commas in non-thousand locations.
invalidThousandsSeparator = false;
if numbers.contains(',')
thousandsRegExp = '^[-/+]*\d+?(\,\d{3})*\.{0,1}\d*$';
if isempty(regexp(numbers, thousandsRegExp, 'once'))
numbers = NaN;
invalidThousandsSeparator = true;
end
end
% Convert numeric text to numbers.
if ~invalidThousandsSeparator
numbers = textscan(char(strrep(numbers, ',', '')), '%f');
numericData(row, col) = numbers{1};
raw{row, col} = numbers{1};
end
catch
raw{row, col} = rawData{row};
end
end
end
%% Split data into numeric and string columns.
rawNumericColumns = raw(:, [1,6,8,9,16,17,18,20]);
rawStringColumns = string(raw(:, [2,3,4,5,7,10,11,12,13,14,15,19]));
%% Replace non-numeric cells with NaN
R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),rawNumericColumns); % Find non-numeric cells
rawNumericColumns(R) = {NaN}; % Replace non-numeric cells
%% Make sure any text containing <undefined> is properly converted to an <undefined> categorical
for catIdx = [2,8,9,10,11,12]
idx = (rawStringColumns(:, catIdx) == "<undefined>");
rawStringColumns(idx, catIdx) = "";
end
%% Create output variable
lcdstations = table;
lcdstations.WBAN = cell2mat(rawNumericColumns(:, 1));
lcdstations.TRANSMITTAL_ID = rawStringColumns(:, 1);
lcdstations.TRANSMITTAL_ID_TYPE = categorical(rawStringColumns(:, 2));
lcdstations.CALL = rawStringColumns(:, 3);
lcdstations.FAA_ID = rawStringColumns(:, 4);
lcdstations.WMO_ID = cell2mat(rawNumericColumns(:, 2));
lcdstations.NWSLI_ID = rawStringColumns(:, 5);
lcdstations.COOP_ID = cell2mat(rawNumericColumns(:, 3));
lcdstations.GHCND_ID = cell2mat(rawNumericColumns(:, 4));
lcdstations.CITY = rawStringColumns(:, 6);
lcdstations.LOCATION = rawStringColumns(:, 7);
lcdstations.LOCATION_AREA = categorical(rawStringColumns(:, 8));
lcdstations.STATE_PROV = categorical(rawStringColumns(:, 9));
lcdstations.FIPS_COUNTRY_CODE = categorical(rawStringColumns(:, 10));
lcdstations.NWS_REGION = categorical(rawStringColumns(:, 11));
lcdstations.LAT = cell2mat(rawNumericColumns(:, 5));
lcdstations.LON = cell2mat(rawNumericColumns(:, 6));
lcdstations.ELEV = cell2mat(rawNumericColumns(:, 7));
lcdstations.ELEV_GROUND_UNIT = categorical(rawStringColumns(:, 12));
lcdstations.UTC = cell2mat(rawNumericColumns(:, 8));