#################################################################################################################################################################################################################################################
# AUTHOR: Matthias Maier
# Task: Create nodes for biomass data using geodata
# Comment:
#   This file is run from main
#   Output parameters (e.g., potential and cost) refer to the final product, which is timber with 20% moisture
#################################################################################################################################################################################################################################################

################################################################################################################################
# IMPORT
from pathlib import Path
import geopandas as gpd
import time
import random
import pandas as pd
import numpy as np
import json
from matplotlib import pyplot as plt
from shapely.ops import nearest_points
from pyproj import Transformer
import folium
import webbrowser

start_time = time.time()

# Parameters
thinning_rate = 0.5 # 50% of trees are removed when thinning
max_felling_intensity = 84 # a maximum of 84 m3/ha of trees can be removed
forwarder_capacity = 10.3 # [m3]
forest_terrain_type = 2 # Skogsforsk Terrengtypschema
forwarder_other_time = 0.15 # [min/m3]
harvester_hourly_costs = 1828 # [NOK/h]
forwarder_hourly_costs = 1624 # [NOK/h]
max_distance_to_road = 5000 # [m] Nodes with longer transport distance will be removed due to infeasible extraction costs
total_forest_potential = 9470 # [1000m3/a]

################################################################################################################################

#################################################################################################################################################################################################################################################

def run(RID, number_of_nodes, pdir, plot_nodes = False):
    '''
    Function to create a set of random biomass nodes
    :param RID: Run-ID. If run locally, RID = 'test'
    :param number_of_nodes: Number of random biomass nodes
    :param pdir: Path to the parent directory (i.e., input_calculations)
    '''

    ################################################################################################################################
    # IMPORT FILES
    print('### CREATING BIOMASS NODES ###')

    with open(pdir + 'X_GLOBAL/data_processed/crs.json', 'r') as openfile:
        base_crs = json.load(openfile)

    # Read data
    gdf_productive_forest = gpd.read_feather(pdir + 'X_GLOBAL/data_processed/dataset_x16_skog_productive.feather')
    gdf_fylkedata = gpd.read_feather(pdir + 'X_GLOBAL/data_processed/dataset_fylke.feather')
    gdf_road_network = gpd.read_feather(pdir + 'X_GLOBAL/data_processed/dataset_Elveg2.feather')
    df_skogdata_per_landsdel = pd.read_excel(pdir + 'X_GLOBAL/data_processed/dataset_landsdeler.xlsx', index_col=0, sheet_name='skog_data')
    df_incline = pd.read_excel(pdir + 'X_GLOBAL/data_processed/dataset_landsdeler.xlsx', index_col=0, sheet_name='incline')
    df_avrg_tree_volume = pd.read_excel(pdir + 'X_GLOBAL/data_processed/dataset_landsdeler.xlsx', index_col=0, sheet_name='avrg_tree_volume')
    metadata = pd.read_excel(pdir + 'X_GLOBAL/optimization_data/metadata.xlsx', index_col=0)['Value']

    # Check consistency
    for df in [gdf_productive_forest, gdf_fylkedata, gdf_road_network]:
        if df.crs != base_crs:
            print('INFORMATION: Different crs detected!')
            df.to_crs(base_crs, inplace=True)

    # Trim data
    gdf_productive_forest = gdf_productive_forest[['gid', 'srvolub', 'srtrean', 'geometry']]
    gdf_road_network.geometry = gdf_road_network.geometry.force_2d()

    # Extract metadata
    timber_density = metadata['Timber density [kg/m3]'] / 1000 # Density of final timber product [tonne/m3]

    print('Time passed: {:.2f} seconds'.format(time.time() - start_time))
    ################################################################################################################################


    ################################################################################################################################
    # INPUT CALCULATION
    print('\n### POTENTIAL CALCULATION ###')

    # Calculate felling density
    gdf_productive_forest['Felling density [m3/ha]'] = pd.concat([gdf_productive_forest['srvolub'] * thinning_rate, pd.Series(np.ones(shape=(len(gdf_productive_forest),)) * max_felling_intensity)], axis='columns').min(axis='columns')

    # Translate volume density (srvolub) into total volume using area
    gdf_productive_forest['srvolub'] = gdf_productive_forest['srvolub'] * gdf_productive_forest.area
    ################################################################################################################################


    ################################################################################################################################
    # REDUCE DATA SIZE
    # 1. Reduce polygons to their centroids (gdf_productive_forest)
    # 2. Pick n random centroids as "reduced centers"
    # 3. For all centroids (gdf_productive_forest), calculate their closest "reduced center" (nearest neighbor)
    # 4. Aggregate values from df_productive_forest given their assigned reduced center
    # 5. Determine in which county the reduced center is
    # 6. Sum up total volume per county and divide potential per county by the total volume to obtain "potential per volume" for all counties
    # 7. Assign potential to all reduced center using their volume and the "potential per volume" from their county

    # Calculate centroids for all forest polygons
    gdf_productive_forest['centroid'] = gdf_productive_forest['geometry'].centroid
    gdf_productive_forest.drop(['geometry'], axis=1, inplace=True)
    gdf_productive_forest.rename(columns={'centroid': 'geometry'}, inplace=True)

    # Pick random points to reduce problem size
    id_samples = random.sample(range(0, len(gdf_productive_forest)), number_of_nodes)

    gdf_reduced_centers = gdf_productive_forest.loc[id_samples,['gid', 'geometry']]
    gdf_reduced_centers.rename(columns={'gid': 'centroid_gid'}, inplace=True)

    # Find nearest neighbor (i.e., reduced center) for every forest point
    gdf_productive_forest = gpd.sjoin_nearest(gdf_productive_forest, gdf_reduced_centers)

    # Group and aggregate: Sum for extensive values and calculate arithmetic average for intrinsic values
    df_productive_forest_grouped = pd.concat([gdf_productive_forest.groupby('centroid_gid')['srvolub'].sum(), gdf_productive_forest.groupby('centroid_gid')[['Felling density [m3/ha]']].mean()], axis='columns')

    # Append the aggregated data from df_productive_forest to df_reduced_centers
    gdf_reduced_centers.index = gdf_reduced_centers['centroid_gid']
    gdf_reduced_centers.drop(['centroid_gid'], axis=1, inplace=True)
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, df_productive_forest_grouped], axis='columns')

    # Assign each center to its landsdel using "within"
    gdf_reduced_centers = gpd.sjoin(gdf_reduced_centers, gdf_fylkedata[['geometry', 'landsdel']], how='left', predicate='within')
    df_landsdeler = gdf_reduced_centers.groupby('landsdel')[['srvolub']].sum() # Area srvolub per landsdel

    # Check that all centers are assigned. It might happen that a center cannot be assigned using "within" when the center is slightly outside the landsdel border. In that case, use closest landsdel
    if any(gdf_reduced_centers['landsdel'].isna()):
        gdf_reduced_centers_not_assigned = gdf_reduced_centers[gdf_reduced_centers['landsdel'].isna()]
        gdf_reduced_centers_not_assigned = gdf_reduced_centers_not_assigned.drop(['landsdel', 'fylkesnummer'], axis='columns')
        gdf_reduced_centers_not_assigned = gpd.sjoin_nearest(gdf_reduced_centers_not_assigned, gdf_fylkedata[['geometry', 'landsdel']])

        gdf_reduced_centers.loc[gdf_reduced_centers_not_assigned.index, 'landsdel'] = gdf_reduced_centers_not_assigned['landsdel']

    # Check that assigning landsdeler worked. Some points might fail
    assert not any(gdf_reduced_centers['landsdel'].isna()), 'Error when assigning landsdeler'

    # Assign forest potential to the landsdeler
    df_landsdeler = pd.concat([df_landsdeler, df_skogdata_per_landsdel['Practical forest potential, MC 3-5 [1000m3]']], axis='columns')
    df_landsdeler['potential per volume'] = df_landsdeler['Practical forest potential, MC 3-5 [1000m3]'] / df_landsdeler['srvolub']

    # Assign forest potential to the reduced centers
    ppv = gdf_reduced_centers[['landsdel']].map(lambda e: df_landsdeler.loc[e, 'potential per volume']).rename(columns={'landsdel': 'ppv'})
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, ppv], axis='columns')
    gdf_reduced_centers['Potential [1000m3]'] = gdf_reduced_centers['ppv'] * gdf_reduced_centers['srvolub']

    # Check results for distribution of forest potential
    error_potential = gdf_reduced_centers['Potential [1000m3]'].sum() - total_forest_potential
    error_volume = gdf_reduced_centers['srvolub'].sum() - gdf_productive_forest['srvolub'].sum()

    print('Error in potential [%]: {:.2f}'.format(error_potential/total_forest_potential*100)) # If few reduced centers, it can happen that a landsdel does not have any centers and its potential is thus not assignd
    print('Error in volume [%]: {:.2f}'.format(error_volume/gdf_productive_forest['srvolub'].sum()*100))
    print('Time passed: {:.2f} seconds'.format(time.time() - start_time))

    assert abs(error_potential) < 10, 'Error in potential distribution'
    assert abs(error_volume) < 1, 'Error in volume distribution'
    ################################################################################################################################


    ################################################################################################################################
    # COST CALCULATION - Production costs
    print('\n### COST CALCULATION ###')

    #######################################################################################################
    # CALCULATE DISTANCE - Reduced centers to connection point on closest road (Extraction distance)
    print('Calculate distance - Reduced centers to connection point on closest road')

    # Calculate nearest road (linestring) for all points in df_reduced_centers
    gdf_reduced_centers = gpd.sjoin_nearest(gdf_reduced_centers, gdf_road_network)

    # When multiple roads have equal distance to a center, duplicate indices occur. Choose the first road and dump the rest
    duplicate_indices = list(set([el for el in list(gdf_reduced_centers.index.values) if list(gdf_reduced_centers.index.values).count(el) > 1]))
    for j in duplicate_indices:
        df_exploded = gdf_reduced_centers.loc[j, :]
        df_exploded = df_exploded.iloc[[0], :]
        gdf_reduced_centers.drop(j, axis='index', inplace=True)
        gdf_reduced_centers = pd.concat([gdf_reduced_centers, df_exploded], axis='index')

    # Get the geometry of the nearest road for the reduced centers using the index_right and dump the unimportant columns
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, gdf_reduced_centers[['index_right']].map(lambda el: gdf_road_network.loc[el, 'geometry']).rename(columns={'index_right': 'closest_road'})], axis='columns')
    gdf_reduced_centers.drop('fartsgrenseVerdi', axis='columns', inplace=True)
    gdf_reduced_centers.rename(columns={'index_right': 'index_closest_road'}, inplace=True)

    # Calculate the nearest point on the closest road for all reduced centers and the distance. Dump centers with distance > max_distance_to_road
    gdf_reduced_centers['point_on_closest_road'] = gdf_reduced_centers.apply(lambda r: nearest_points(r['closest_road'], r['geometry'])[0], axis='columns')
    gdf_reduced_centers['Extraction distance [m]'] = gdf_reduced_centers.apply(lambda r: r['geometry'].distance(r['point_on_closest_road']), axis='columns')
    gdf_reduced_centers = gdf_reduced_centers[gdf_reduced_centers['Extraction distance [m]'] <= max_distance_to_road]

    print('Time passed: {:.2f} seconds'.format(time.time() - start_time))
    #######################################################################################################

    #######################################################################################################

    #######################################################################################################
    # CALCULATE PRODUCTIVITIES
    print('\nCalculate productivity harvester and forwarder')

    # Calculate average tree volume for reduced center via drawing from a distribution in the landsdel where the center is
    avrg_tre_vol = gdf_reduced_centers[['landsdel']].map(lambda ld: np.random.choice(df_avrg_tree_volume.loc[ld,:][2:4].values, size=1, p=df_avrg_tree_volume.loc[ld,:][0:2].values)[0]).rename(columns={'landsdel': 'Average Tree Volume [m3/tree]'})
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, avrg_tre_vol], axis='columns')

    # Calculate incline for reduced center via drawing from a distribution in the landsdel where the center is
    icl = gdf_reduced_centers[['landsdel']].map(lambda e: np.random.choice(df_incline.index.values, size=1, p=df_incline[e])[0]).rename(columns={'landsdel': 'Incline class'})
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, icl], axis='columns')

    # Harvester
    gdf_reduced_centers['HVT Productivity [m3/h]'] = gdf_reduced_centers['Average Tree Volume [m3/tree]'].map(lambda x: -191.96 * x * x + 115.82 * x + 0.9 if x<0.2 else 28.8*x+10.64)

    # Forwarder
    gdf_reduced_centers['FWD Terminal time [min/m3]'] = gdf_reduced_centers['Felling density [m3/ha]'].map(lambda x: ((3.79 * x + 65) * 0.75) / x)
    gdf_reduced_centers['FWD Driving speed [m/min]'] = gdf_reduced_centers['Incline class'].map(lambda x: 75 - 8.2 * forest_terrain_type - 1.4 * x * x)
    gdf_reduced_centers['FWD Driving time [min/m3'] = 2 * gdf_reduced_centers['Extraction distance [m]'] / (gdf_reduced_centers['FWD Driving speed [m/min]'] * forwarder_capacity)
    gdf_reduced_centers['FWD Total time [min/m3]'] = gdf_reduced_centers[['FWD Terminal time [min/m3]', 'FWD Driving time [min/m3']].sum(axis='columns') + forwarder_other_time
    gdf_reduced_centers['FWD Productivity [m3/h]'] = 1 / gdf_reduced_centers['FWD Total time [min/m3]'] * 60

    gdf_reduced_centers['Diesel Consumption [l/m3]'] = (metadata['Biomass Production Diesel Consumption - Harvester [liter/hour]'] / gdf_reduced_centers['HVT Productivity [m3/h]'] +
                                                        metadata['Biomass Production Diesel Consumption - Forwarder [liter/hour]'] / gdf_reduced_centers['FWD Productivity [m3/h]']) # Diesel consumption per m3 fresh timber
    #######################################################################################################

    #######################################################################################################
    # CALCULATE OPEX
    print('Calculate OPEX')

    # OPEX
    gdf_reduced_centers['Production costs [NOK/m3]'] = harvester_hourly_costs / gdf_reduced_centers['HVT Productivity [m3/h]'] + forwarder_hourly_costs / gdf_reduced_centers['FWD Productivity [m3/h]'] # Production costs per m3 fresh wood

    # Add drying process (3-6 weeks drying reduces moisture from 40% to 20%) -> Potential decreases, specific costs increase
    gdf_reduced_centers['Potential [1000m3]'] = gdf_reduced_centers['Potential [1000m3]'] * 0.6 / 0.8 # Timber with moisture content 20%
    gdf_reduced_centers['Potential [kt/a]'] = gdf_reduced_centers['Potential [1000m3]'] * 1000 * timber_density / 1000 # Timber with moisture content 20%
    gdf_reduced_centers['Production costs [NOK/m3]'] = gdf_reduced_centers['Production costs [NOK/m3]'] / 0.6 * 0.8 # Production costs per m3 timber
    gdf_reduced_centers['Production costs [NOK/t]'] = gdf_reduced_centers['Production costs [NOK/m3]'] / timber_density # Production costs per tonne timber
    gdf_reduced_centers['Diesel Consumption [l/m3]'] = gdf_reduced_centers['Diesel Consumption [l/m3]'] / 0.6 * 0.8 # Timber with moisture content 20%

    # Remove infeasible biomass nodes (production costs > market price)
    timber_price = gdf_reduced_centers[['landsdel']].map(lambda ld: df_skogdata_per_landsdel.loc[ld, 'Timber price [NOK2023/m3]']).rename(columns={'landsdel': 'Timber price [NOK/m3]'})
    gdf_reduced_centers = pd.concat([gdf_reduced_centers, timber_price], axis='columns')
    gdf_reduced_centers = gdf_reduced_centers[gdf_reduced_centers['Production costs [NOK/m3]'] < gdf_reduced_centers['Timber price [NOK/m3]']]

    print('{} nodes with production costs < market price in their fylke were found!'. format(len(gdf_reduced_centers)))
    print('Average production costs for nodes below market price [NOK/m3]: {:.2f}'.format(gdf_reduced_centers['Production costs [NOK/m3]'].mean()))

    print('Time passed: {:.2f} seconds'.format(time.time() - start_time))

    if plot_nodes:
        print('Plotting feasible biomass nodes')

        # Export data insights
        for feature in ['Felling density [m3/ha]', 'Extraction distance [m]', 'Potential [1000m3]', 'Average Tree Volume [m3/tree]', 'HVT Productivity [m3/h]', 'FWD Productivity [m3/h]', 'Production costs [NOK/m3]', 'Production costs [NOK/t]', 'Diesel Consumption [l/m3]']:
            fig, ax = plt.subplots()
            plt.hist(gdf_reduced_centers[feature], bins=50, edgecolor='grey', color='skyblue', density=True)
            plt.xlim([gdf_reduced_centers[feature].min(), gdf_reduced_centers[feature].max()])
            plt.xlabel(feature)
            plt.ylabel('Occurrence density')
            plt.title('Occurrence distribution of selected feature')
            plt.savefig(pdir + 'nodes_primary_biomass_resources/plots/05_biomass_nodes_features - ' + feature.split('[')[0] + '.png')

        # Transforming geometry for plotting
        transformer_25833_to_4326 = Transformer.from_crs("EPSG:25833", "EPSG:4326")
        gdf_reduced_centers['centroid_4326'] = gdf_reduced_centers.apply(lambda row: tuple(reversed(transformer_25833_to_4326.transform(row.geometry.x, row.geometry.y))), axis=1)

        m = folium.Map(tiles="cartodb positron")
        _ = gdf_reduced_centers.apply(lambda row: folium.Marker(location=tuple(reversed(row.centroid_4326)), icon=folium.Icon(color='black', icon_color='black'),
                                                                tooltip='Potential: {:.2f} [1000m3]\nProduction Cost: {:.2f} [NOK/m3]'.format(row['Potential [1000m3]'], row['Production costs [NOK/m3]'])).add_to(m), axis=1)

        plot_name = pdir + 'nodes_primary_biomass_resources/plots/06_biomass_nodes_' + RID + '.html'
        m.save(plot_name)
        webbrowser.open_new_tab(plot_name)

    print('Exporting feasible biomass nodes')

    # Export
    gdf_reduced_centers.index = ['biomass_resource_' + str(el) for el in gdf_reduced_centers.index]
    gdf_reduced_centers = gdf_reduced_centers[['geometry', 'point_on_closest_road', 'Potential [kt/a]', 'Production costs [NOK/m3]', 'Production costs [NOK/t]', 'Timber price [NOK/m3]', 'Diesel Consumption [l/m3]']]
    gdf_reduced_centers.to_feather(pdir + 'nodes_primary_biomass_resources/output_data/node_data_01_biomass_' + RID + '.feather')

    #######################################################################################################

#################################################################################################################################################################################################################################################

if __name__ == '__main__':
    pdir = str(Path(__file__).resolve().parent.parent) + '\\'

    run('test', 1000, pdir, plot_nodes=True)