#%% Script to read and work with Access database import pyodbc import pandas as pd import geopandas as gpd #%% Establish a connection to the Access database conn_str = r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/Wood English-Wabigoon Export_alldata_2017-current.accdb;' conn = pyodbc.connect(conn_str) # Get the list of tables tables = conn.cursor().tables() # Save a list of the table names table_names = [table.table_name for table in tables] #%% Print rows from a table cursor = conn.cursor() cursor.execute('SELECT * FROM [General_Query]') for row in cursor.fetchall(): print(row) #%% Print the column names cursor = conn.cursor() cursor.execute('SELECT * FROM [General_Query]') columns = [column[0] for column in cursor.description] print(columns) #%% Save the table to a pandas dataframe df = pd.read_sql_query('SELECT * FROM [General_Query]', conn) #%% Save the table to a pandas dataframe df_Samples = pd.read_sql_query('SELECT * FROM [EW_Export_Samples]', conn) #%% Save the table to a pandas dataframe df_Results = pd.read_sql_query('SELECT * FROM [EW_Export_Results]', conn) #%% Save the table to a pandas dataframe df_Locations = pd.read_sql_query('SELECT * FROM [EW_Export_Locations]', conn) #%% Close the connection conn.close() #%% Select a subset of the data where the media is Surface Water ('SW') and the param_name is 'Mercury' and the fraction is 'T' df_SW_Hg = df.loc[(df['media'] == 'SW') & (df['param_name'] == 'Mercury') & (df['fraction'] == 'T'), ['Long_coord', 'Lat_coord', 'ppm_result', 'ppm_uom', 'field_sample_date', 'param_name', 'tech_task_name']] # Convert to geo dataframe using Lat and Long gdf_SW_Hg = gpd.GeoDataFrame(df_SW_Hg, geometry=gpd.points_from_xy(df_SW_Hg.Long_coord, df_SW_Hg.Lat_coord, crs="EPSG:4326")) # Convert result to numeric gdf_SW_Hg['ppm_result'] = pd.to_numeric(gdf_SW_Hg['ppm_result']) # Where ppm_uom is 'MG/L', convert to NG/L gdf_SED_Hg.loc[gdf_SED_Hg['ppm_uom'] == 'MG/L', 'ppm_result'] = gdf_SED_Hg['ppm_result'] * 1000000 # Drop where no result or location gdf_SW_Hg = gdf_SW_Hg.dropna(subset=['ppm_result', 'Long_coord', 'Lat_coord']) # Convert datetime to string for shapefile gdf_SW_Hg['field_sample_date'] = gdf_SW_Hg['field_sample_date'].dt.strftime('%Y-%m') # Save as a shapefile gdf_SW_Hg.to_file('C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/EW_Export_Samples_SW_Hg_RevB.shp') #%% Select a subset of the data where: # - media is Sediment ('SED') # - the param_name is 'Mercury' # - The top depth is 0 df_SED_Hg = df.loc[(df['media'] == 'SED') & (df['param_name'] == 'Mercury') & (df['top_depth'] == 0), ['Long_coord', 'Lat_coord', 'ppm_result', 'ppm_uom']] # Convert to geo dataframe using Lat and Long gdf_SED_Hg = gpd.GeoDataFrame(df_SED_Hg, geometry=gpd.points_from_xy(df_SED_Hg.Long_coord, df_SED_Hg.Lat_coord, crs="EPSG:4326")) # Convert result to numeric gdf_SED_Hg['ppm_result'] = pd.to_numeric(gdf_SED_Hg['ppm_result']) # Where ppm_uom is 'MG/KG', convert to NG/G gdf_SED_Hg.loc[gdf_SED_Hg['ppm_uom'] == 'MG/KG', 'ppm_result'] = gdf_SED_Hg['ppm_result'] * 1000 # Drop rows with no locatuion gdf_SED_Hg = gdf_SED_Hg.dropna(subset=['Long_coord', 'Lat_coord']) # Save as a shapefile df_SW_Hg.to_file('C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/EW_Export_Samples_SW_Hg.shp')