function lcdstations = importfile(filename, startRow, endRow) %IMPORTFILE Import numeric data from a text file as a matrix. % LCDSTATIONS = IMPORTFILE(FILENAME) Reads data from text file FILENAME % for the default selection. % % LCDSTATIONS = IMPORTFILE(FILENAME, STARTROW, ENDROW) Reads data from % rows STARTROW through ENDROW of text file FILENAME. % % Example: % lcdstations = importfile('lcd-stations.txt', 1, 272); % % See also TEXTSCAN. % Auto-generated by MATLAB on 2019/11/25 12:59:26 %% Initialize variables. if nargin<=2 startRow = 1; endRow = inf; end %% Read columns of data as text: % For more information, see the TEXTSCAN documentation. formatSpec = '%5s%5s%11s%20s%11s%6s%4s%12s%12s%52s%61s%26s%2s%3s%9s%11s%11s%9s%4s%s%[^\n\r]'; %% Open the text file. fileID = fopen(filename,'r'); %% Read columns of data according to the format. % This call is based on the structure of the file used to generate this % code. If an error occurs for a different file, try regenerating the code % from the Import Tool. dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'HeaderLines', startRow(1)-1, 'ReturnOnError', false, 'EndOfLine', '\r\n'); for block=2:length(startRow) frewind(fileID); dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', '', 'WhiteSpace', '', 'TextType', 'string', 'HeaderLines', startRow(block)-1, 'ReturnOnError', false, 'EndOfLine', '\r\n'); for col=1:length(dataArray) dataArray{col} = [dataArray{col};dataArrayBlock{col}]; end end %% Remove white space around all cell columns. dataArray{2} = strtrim(dataArray{2}); dataArray{4} = strtrim(dataArray{4}); dataArray{5} = strtrim(dataArray{5}); dataArray{7} = strtrim(dataArray{7}); dataArray{10} = strtrim(dataArray{10}); dataArray{11} = strtrim(dataArray{11}); %% Close the text file. fclose(fileID); %% Convert the contents of columns containing numeric text to numbers. % Replace non-numeric text with NaN. raw = repmat({''},length(dataArray{1}),length(dataArray)-1); for col=1:length(dataArray)-1 raw(1:length(dataArray{col}),col) = mat2cell(dataArray{col}, ones(length(dataArray{col}), 1)); end numericData = NaN(size(dataArray{1},1),size(dataArray,2)); for col=[1,6,8,9,16,17,18,20] % Converts text in the input cell array to numbers. Replaced non-numeric % text with NaN. rawData = dataArray{col}; for row=1:size(rawData, 1) % Create a regular expression to detect and remove non-numeric prefixes and % suffixes. regexstr = '(?.*?)(?([-]*(\d+[\,]*)+[\.]{0,1}\d*[eEdD]{0,1}[-+]*\d*[i]{0,1})|([-]*(\d+[\,]*)*[\.]{1,1}\d+[eEdD]{0,1}[-+]*\d*[i]{0,1}))(?.*)'; try result = regexp(rawData(row), regexstr, 'names'); numbers = result.numbers; % Detected commas in non-thousand locations. invalidThousandsSeparator = false; if numbers.contains(',') thousandsRegExp = '^[-/+]*\d+?(\,\d{3})*\.{0,1}\d*$'; if isempty(regexp(numbers, thousandsRegExp, 'once')) numbers = NaN; invalidThousandsSeparator = true; end end % Convert numeric text to numbers. if ~invalidThousandsSeparator numbers = textscan(char(strrep(numbers, ',', '')), '%f'); numericData(row, col) = numbers{1}; raw{row, col} = numbers{1}; end catch raw{row, col} = rawData{row}; end end end %% Split data into numeric and string columns. rawNumericColumns = raw(:, [1,6,8,9,16,17,18,20]); rawStringColumns = string(raw(:, [2,3,4,5,7,10,11,12,13,14,15,19])); %% Replace non-numeric cells with NaN R = cellfun(@(x) ~isnumeric(x) && ~islogical(x),rawNumericColumns); % Find non-numeric cells rawNumericColumns(R) = {NaN}; % Replace non-numeric cells %% Make sure any text containing is properly converted to an categorical for catIdx = [2,8,9,10,11,12] idx = (rawStringColumns(:, catIdx) == ""); rawStringColumns(idx, catIdx) = ""; end %% Create output variable lcdstations = table; lcdstations.WBAN = cell2mat(rawNumericColumns(:, 1)); lcdstations.TRANSMITTAL_ID = rawStringColumns(:, 1); lcdstations.TRANSMITTAL_ID_TYPE = categorical(rawStringColumns(:, 2)); lcdstations.CALL = rawStringColumns(:, 3); lcdstations.FAA_ID = rawStringColumns(:, 4); lcdstations.WMO_ID = cell2mat(rawNumericColumns(:, 2)); lcdstations.NWSLI_ID = rawStringColumns(:, 5); lcdstations.COOP_ID = cell2mat(rawNumericColumns(:, 3)); lcdstations.GHCND_ID = cell2mat(rawNumericColumns(:, 4)); lcdstations.CITY = rawStringColumns(:, 6); lcdstations.LOCATION = rawStringColumns(:, 7); lcdstations.LOCATION_AREA = categorical(rawStringColumns(:, 8)); lcdstations.STATE_PROV = categorical(rawStringColumns(:, 9)); lcdstations.FIPS_COUNTRY_CODE = categorical(rawStringColumns(:, 10)); lcdstations.NWS_REGION = categorical(rawStringColumns(:, 11)); lcdstations.LAT = cell2mat(rawNumericColumns(:, 5)); lcdstations.LON = cell2mat(rawNumericColumns(:, 6)); lcdstations.ELEV = cell2mat(rawNumericColumns(:, 7)); lcdstations.ELEV_GROUND_UNIT = categorical(rawStringColumns(:, 12)); lcdstations.UTC = cell2mat(rawNumericColumns(:, 8));