AJMR-Python-Baird/EWR_D3D/WSP_DB.py

#%% Script to read and work with Access database

import pyodbc
import pandas as pd
import geopandas as gpd

#%% Establish a connection to the Access database
conn_str = r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/Wood English-Wabigoon Export_alldata_2017-current.accdb;'
conn = pyodbc.connect(conn_str)

# Get the list of tables
tables = conn.cursor().tables()

# Save a list of the table names
table_names = [table.table_name for table in tables]


#%% Print rows from a table
cursor = conn.cursor()
cursor.execute('SELECT * FROM [General_Query]')
for row in cursor.fetchall():
    print(row)


#%% Print the column names
cursor = conn.cursor()
cursor.execute('SELECT * FROM [General_Query]')
columns = [column[0] for column in cursor.description]
print(columns)

#%% Save the table to a pandas dataframe
df = pd.read_sql_query('SELECT * FROM [General_Query]', conn)

#%% Save the table to a pandas dataframe
df_Samples = pd.read_sql_query('SELECT * FROM [EW_Export_Samples]', conn)

#%% Save the table to a pandas dataframe
df_Results = pd.read_sql_query('SELECT * FROM [EW_Export_Results]', conn)

#%% Save the table to a pandas dataframe
df_Locations = pd.read_sql_query('SELECT * FROM [EW_Export_Locations]', conn)

#%% Close the connection
conn.close()


#%% Select a subset of the data where the media is Surface Water ('SW') and the param_name is 'Mercury' and the fraction is 'T'
df_SW_Hg = df.loc[(df['media'] == 'SW') & (df['param_name'] == 'Mercury') & (df['fraction'] == 'T'), ['Long_coord', 'Lat_coord', 'ppm_result', 'ppm_uom', 'field_sample_date', 'param_name', 'tech_task_name']]

# Convert to geo dataframe using Lat and Long
gdf_SW_Hg = gpd.GeoDataFrame(df_SW_Hg, geometry=gpd.points_from_xy(df_SW_Hg.Long_coord, df_SW_Hg.Lat_coord, crs="EPSG:4326"))

# Convert result to numeric
gdf_SW_Hg['ppm_result'] = pd.to_numeric(gdf_SW_Hg['ppm_result'])

# Where ppm_uom is 'MG/L', convert to NG/L
gdf_SED_Hg.loc[gdf_SED_Hg['ppm_uom'] == 'MG/L', 'ppm_result'] = gdf_SED_Hg['ppm_result'] * 1000000

# Drop where no result or location
gdf_SW_Hg = gdf_SW_Hg.dropna(subset=['ppm_result', 'Long_coord', 'Lat_coord'])

# Convert datetime to string for shapefile
gdf_SW_Hg['field_sample_date'] = gdf_SW_Hg['field_sample_date'].dt.strftime('%Y-%m')

# Save as a shapefile
gdf_SW_Hg.to_file('C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/EW_Export_Samples_SW_Hg_RevB.shp')


#%% Select a subset of the data where:
# - media is Sediment ('SED')
# - the param_name is 'Mercury'
# - The top depth is 0

df_SED_Hg = df.loc[(df['media'] == 'SED') & (df['param_name'] == 'Mercury') & (df['top_depth'] == 0), ['Long_coord', 'Lat_coord', 'ppm_result', 'ppm_uom']]

# Convert to geo dataframe using Lat and Long
gdf_SED_Hg = gpd.GeoDataFrame(df_SED_Hg, geometry=gpd.points_from_xy(df_SED_Hg.Long_coord, df_SED_Hg.Lat_coord, crs="EPSG:4326"))

# Convert result to numeric
gdf_SED_Hg['ppm_result'] = pd.to_numeric(gdf_SED_Hg['ppm_result'])

# Where ppm_uom is 'MG/KG', convert to NG/G
gdf_SED_Hg.loc[gdf_SED_Hg['ppm_uom'] == 'MG/KG', 'ppm_result'] = gdf_SED_Hg['ppm_result'] * 1000

# Drop rows with no locatuion
gdf_SED_Hg = gdf_SED_Hg.dropna(subset=['Long_coord', 'Lat_coord'])

# Save as a shapefile
df_SW_Hg.to_file('C:/Users/arey/files/Projects/Grassy Narrows/Wood English-Wabigoon Export_alldata_2017-current/EW_Export_Samples_SW_Hg.shp')