Source code for PTO.database.NASA_exoplanet_archive

from . import catalog as cat
import datetime
import pyvo as vo
import pandas as pd
import numpy as np
import logging
from ..utils.utilities import logger_default
from .mappers import _NASA_EXOPLANET_ARCHIVE_COMPOSITE_MAPPER, _NASA_EXOPLANET_ARCHIVE_FULL_MAPPER

logger = logging.getLogger(__name__)
if not logger.handlers:
    logger = logger_default(logger)



[docs]
class NASA_Exoplanet_Archive_CompositeDefault(cat.CatalogComposite):
    """
    NASA_Exoplanet_Archive_CompositeDefault is a class that extends the CatalogComposite class to handle the loading and processing of the NASA Exoplanet Archive Composite table using the TAP protocol.

    Methods
    -------
    :meth:`.NASA_Exoplanet_Archive_CompositeDefault.load_API_table`
        Loads the Table using the API system, in particular the TAP protocol. This method is rerun every week, but the output is saved and by default loaded instead of rerunning the TAP protocol.
    :meth:`.NASA_Exoplanet_Archive_CompositeDefault._rename_columns`
        Renames the columns in the pandas dataframe according to a predefined mapping.
    :meth:`.NASA_Exoplanet_Archive_CompositeDefault._drop_columns`
        Drops all columns that are irrelevant from the pandas dataframe.
    :meth:`.NASA_Exoplanet_Archive_CompositeDefault._absolute_errors`
        Reverses the sign of the lower error to an absolute value to ensure further functionality.
    """


[docs]
    def load_API_table(self, force_load=False) -> None:
        """
        Loads the Table using the API system, in particular the TAP protocol.

        This is rerun every week, but the output is saved and by default loaded instead of rerunning the TAP protocol.

        Parameters
        ----------
        force_load : bool
            Flag to trigger reloading of the TAP protocol.
                If False, the self.filename is going to be loaded, and only if a week 
                or more passed the TAP protocol is rerun. If True or if last run 
                happened week or more ago, the TAP protocal is relaunched.
                (default: False)
        """

        try:
            if force_load:
                logger.info('Forced reload:')
                raise
            logger.info(
                'Trying to load NASA Exoplanet Archive Composite table')
            self.load()
            if (datetime.datetime.now() - self.time) > datetime.timedelta(days=7):
                logger.info('Too old data, reloading:')
                raise
        except:
            logger.info('Accessing NASA Exoplanet Archive')
            service = vo.dal.TAPService(
                "https://exoplanetarchive.ipac.caltech.edu/TAP/")
            logger.info('Fetching table')
            self.table = pd.DataFrame(
                service.search("SELECT * FROM pscomppars"))
            logger.info('Table fetched successfully')
            self.time = datetime.datetime.now()
            self._rename_columns()
            self._drop_columns()
            self._absolute_errors()
            self._get_all()
            self.legacy_table = self.table
            self.save()


    def _rename_columns(self) -> None:
        """
        Renames the columns in the pandas dataframe
        """
        self.table = self.table.rename(
            columns=_NASA_EXOPLANET_ARCHIVE_COMPOSITE_MAPPER)

    def _drop_columns(self) -> None:
        """
        Drops all columns that are irrelevant.
        """
        _TODROP = [key for key in self.table.keys() if
                   not (key.startswith('Planet.')) and
                   not (key.startswith('Star.')) and
                   not (key.startswith('Magnitude.')) and
                   not (key.startswith('Position.')) and
                   not (key.startswith('System.')) and
                   not (key.startswith('Flag.')) and
                   not (key.startswith('Discovery.'))
                   ]
        self.table = self.table.drop(_TODROP, axis=1)

    def _absolute_errors(self) -> None:
        """
        Reverses the sign of the lower error to absolute value. This ensures further functionality.
        """
        keys = [key for key in self.table.keys() if key.endswith('.Lower')]

        for key in keys:
            self.table[key] = np.abs(self.table[key])




[docs]
class NASA_Exoplanet_Archive_FullTable(cat.CatalogFull):
    """
    NASA_Exoplanet_Archive_FullTable is a class that handles the loading and processing of the NASA Exoplanet Archive Composite table using the TAP protocol. It extends the CatalogFull class from the Catalog module.

    Methods
    -------
    load_API_table(force_load=False) -> None
        Loads the Table using the API system, in particular the TAP protocol. This method is rerun every week, but the output is saved and by default loaded instead of rerunning the TAP protocol.
    _rename_columns() -> None
        Renames the columns in the pandas dataframe according to a predefined mapping.
    _drop_columns() -> None
        Drops all columns that are irrelevant from the pandas dataframe.
    get_most_precise_value(group, column)
        Returns the most precise value for a given column within a group, based on the smallest error range.
    aggregate_most_precise_values() -> None
        Aggregates the most precise values for each planet by considering the error ranges for relevant columns.
    """


[docs]
    def load_API_table(self, force_load=False) -> None:
        """
        Loads the Table using the API system, in particular the TAP protocol. 

        This is rerun every week, but the output is saved and by default loaded instead of rerunning the TAP protocol

        Parameters
        ----------
        force_load : bool, optional
            Flag to trigger reloading of the TAP protocol, by default False. If False, the self.filename is going to loaded, and only if a week or more passed the TAP protocol is rerun. If True or if last run happened week or more ago, the TAP protocal is relaunched. 
        """
        try:
            if force_load:
                logger.info('Forced reload:')
                raise
            logger.info(
                'Trying to load NASA Exoplanet Archive Composite table')
            self.load()
            logger.info('Success!')
            if (datetime.datetime.now() - self.time) > datetime.timedelta(days=7):
                logger.info('Too old data, reloading:')
                raise
        except:
            logger.info('Accessing NASA Exoplanet Archive')
            service = vo.dal.TAPService(
                "https://exoplanetarchive.ipac.caltech.edu/TAP/")
            logger.info('Fetching table')
            self.table = pd.DataFrame(service.search("SELECT * FROM ps"))
            logger.info('Table fetched successfully')
            self.time = datetime.datetime.now()
            self._rename_columns()
            self._drop_columns()
            # self._absolute_errors()
            self._get_all()

            self.legacy_table = self.table
            self.save()



[docs]
    def _rename_columns(self) -> None:
        """
        Renames the columns in the pandas dataframe
        """
        self.table = self.table.rename(
            columns=_NASA_EXOPLANET_ARCHIVE_FULL_MAPPER)



[docs]
    def _drop_columns(self) -> None:
        """
        Drops all columns that are irrelevant.
        """
        _TODROP = [key for key in self.table.keys() if
                   not (key.startswith('Planet.')) and
                   not (key.startswith('Star.')) and
                   not (key.startswith('Magnitude.')) and
                   not (key.startswith('Position.')) and
                   not (key.startswith('System.')) and
                   not (key.startswith('Flag.')) and
                   not (key.startswith('Discovery.'))
                   ]
        self.table = self.table.drop(_TODROP, axis=1)





[docs]
class NASA_Exoplanet_Archive_CompositeMostPrecise(NASA_Exoplanet_Archive_FullTable):


[docs]
    def get_most_precise_value(self, group, column):
        error_upper = column + '.Error.Upper'
        error_lower = column + '.Error.Lower'
        if error_upper in group.columns and error_lower in group.columns and (group[error_upper].notna() != False).all() and (group[error_lower].notna() != False).all():
            group['error_range'] = group[error_upper] - group[error_lower]
            most_precise_row = group.loc[group['error_range'].idxmin()]
            return most_precise_row[column]
        else:
            return group[column].iloc[0]



[docs]
    def aggregate_most_precise_values(self):
        # List of columns to consider for precision
        columns_to_consider = [col for col in self.table.columns if not col.endswith('.Error.Upper') and not col.endswith(
            '.Error.Lower') and f"{col}.Error.Upper" in self.table.columns and f"{col}.Error.Lower" in self.table.columns]

        # Group by 'Planet.Name'
        grouped = self.table.groupby('Planet.Name')

        # Create a new DataFrame to store the aggregated results
        aggregated_df = []

        for name, group in grouped:
            aggregated_row = {'Planet.Name': name}
            for column in columns_to_consider:
                if column != 'Planet.Name':
                    aggregated_row[column] = self.get_most_precise_value(
                        group, column)
            aggregated_df.append(aggregated_row)

        self.table = pd.DataFrame(aggregated_df)


    ...



if __name__ == '__main__':
    logger.warning('Debugging Database module')
    logger.warning('='*25)
    import os
    os.chdir('/media/chamaeleontis/Observatory_main/Code/observations_transits/PTO/')

    # test_full = NASA_Exoplanet_Archive_FullTable()
    # test_full.load_API_table(force_load=True)
    # logger.print('Hello there!')

    test = NASA_Exoplanet_Archive_CompositeDefault()
    logger.print('Hello there!')
    test.load_API_table(force_load=True)
    
    
    
    # test.print_all_keys()
    # fig, ax = test.plot_population_diagram(
    #     x_key='Planet.InsolationFlux',
    #     y_key='Planet.RadiusJupiter',
    # )

    logger.print(
        f"Length before further filtering of the table: {test.table.shape[0]}")
    test.table = test.table[test.table['Planet.Name'] == 'HIP 67522 b']
    logger.print(
        f"Length after further filtering of the table: {test.table.shape[0]}")
    
    
    
    import seaborn as sns
    with sns.plotting_context('talk'):
        fig, ax = test.highlight_sample(
            x_key='Planet.InsolationFlux',
            y_key='Planet.RadiusJupiter',
            color='red',
            s=500
        )
        ax.set_xlim(10000, 10)
        ax.set_ylim(0.2, 2.5)
        ax.set_xlabel('Insolation Flux [$S_\oplus$]', fontsize=36)
        ax.set_ylabel('Planetary radius [$R_\oplus$]', fontsize=36)
        ax.tick_params(axis='both', which='major', labelsize=30)
        # ax.tick_params(axis='both', which='minor', labelsize=8)
        fig.tight_layout()
        fig.savefig(
            '/media/chamaeleontis/Observatory_main/Analysis_dataset/WASP-31/figures/whitemode_normal/radius_insolation_flux.pdf')
        # ax.set_xlim(0.1,50)
    logger.print('General Kenobi!!!!')

    # from . import catalog as cat
    # from ..telescopes import telescopes as tel
    # from ..transits.windows import Windows

    # ATREIDES = cat.CatalogComposite()
    # ATREIDES.create_exoplanet_csv('ATREIDES_custom.csv')
    # ATREIDES.load_exoplanet_csv('ATREIDES_custom.csv')

    # for P in [108, 109, 110, 111]:
    #     Transits = Windows(
    #         table = test.table,
    #         directory= f'/media/chamaeleontis/Observatory_main/ESO_scheduling/PTO_developement/',
    #         observing_period= f'ESO.{P}'
    #     )

    #     Transits.directory = f'/media/chamaeleontis/Observatory_main/ESO_scheduling/PTO_developement/'
    #     Transits.generate_observability(
    #         location= tel.VLT,
    #         partial= False,
    #         velocity_offset= 0,
    #         velocity_range = 5
    #     )


# %%

[docs]
def transform_time_format(input_string):
    # Split the input string
    target, date_str, quality, time_start, time_end, _ = input_string.split(';')

    # Parse the date
    year = int(date_str[0:4])
    month = int(date_str[4:6])
    day = int(date_str[6:8])

    # Parse the time
    hour_start = int(time_start.split(':')[0])
    minute_start = int(time_start.split(':')[1])

    # If hour < 12, it's morning of the next day
    if hour_start < 12:
        observation_date = datetime(year, month, day) + timedelta(days=1)
    else:
        observation_date = datetime(year, month, day)

    hour_end = int(time_end.split(':')[0])
    minute_end = int(time_end.split(':')[1])

    if hour_end < 12:
        observation_date_end = datetime(year, month, day) + timedelta(days=1)
    else:
        observation_date_end = datetime(year, month, day)


    # Create the start time
    start_time = datetime(observation_date.year,
                         observation_date.month,
                         observation_date.day,
                         hour_start,
                         minute_start)

    end_time = datetime(observation_date_end.year,
                        observation_date_end.month,
                        observation_date_end.day,
                        hour_end,
                        minute_end)

    # Format to ISO time string
    start_iso = start_time.strftime("%Y-%m-%dT%H:%M")
    end_iso = end_time.strftime("%Y-%m-%dT%H:%M")

    # Create the output string
    output = f"between({start_iso},{end_iso},{1},\"{target} P117\")"

    return output


# Example usage
# input_string = "CoRoT-22 b;20260522;10;03:37;09:54;1"
# result = transform_time_format(input_string)
# print(result)

# # %%
# list_of_strings = [
#     'TOI-3071 b;20260112;1;03:45;07:35;1',
#     'TOI-3071 b;20260131;1;03:50;07:40;1',
#     'TOI-3071 b;20260214;1;02:19;06:09;1',
#     'TOI-3071 b;20260219;1;03:56;07:46;1',
#     'TOI-3071 b;20260228;1;00:47;04:37;1',
#     'TOI-3071 b;20260305;1;02:25;06:15;1',
#     'TOI-3071 b;20260310;1;04:02;07:52;1',
#     'TOI-3071 b;20260319;1;00:53;04:43;1',
#     'TOI-3071 b;20260324;1;02:31;06:21;1',
#     'TOI-3071 b;20260329;1;04:08;07:58;1',
# ]


# list_of_strings = [
#     'TOI-3071 b;20250407;1;03:54;07:44;1',
#     'TOI-3071 b;20250416;1;00:45;04:35;1',
#     'TOI-3071 b;20250421;1;02:23;06:13;1',
#     'TOI-3071 b;20250505;1;00:51;04:41;1',
#     'TOI-3071 b;20250402;1;02:17;06:07;1',
#     'TOI-3071 b;20250524;1;00:57;04:47;1',
#     'TOI-3071 b;20250607;1;23:25;03:15;1',
# ]

list_of_strings = [
#     'TOI-3071 b;20260407;1;00:59;04:49;1',
#     'TOI-3071 b;20260412;1;02:36;06:26;1',
#     'TOI-3071 b;20260426;1;01:05;04:55;1',
#     'TOI-3071 b;20260501;1;02:42;06:32;1',
#     'TOI-3071 b;20260510;1;23:33;03:23;1',
#     'TOI-3071 b;20260515;1;01:11;05:01;1',
#     'TOI-3071 b;20260529;1;23:39;03:29;1',
#     
    'TOI-470 b;20251224;1;02:14;07:35;1'
]