topobench.data.datasets package#

Submodules#

topobench.data.datasets.citation_hypergaph_dataset module#

Dataset class for US County Demographics dataset.

class topobench.data.datasets.citation_hypergaph_dataset.CitationHypergraphDataset(root: str, name: str, parameters: DictConfig)[source]#

Bases: InMemoryDataset

Dataset class for US County Demographics dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'coauthorship_cora': 'zip', 'coauthorship_dblp': 'zip', 'cocitation_citeseer': 'zip', 'cocitation_cora': 'zip', 'cocitation_pubmed': 'zip'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'coauthorship_cora': 'https://drive.google.com/file/d/1J5fLPABWrM9SH_7m85n7--oHDVmwJeib/view?usp=sharing', 'coauthorship_dblp': 'https://drive.google.com/file/d/16ryf4Ve-t0_nAla0VfjtSxSAG8Sye8TZ/view?usp=sharing', 'cocitation_citeseer': 'https://drive.google.com/file/d/1XWfu1jtijsmHmfCP6UQxyLsuPM8GBNJb/view?usp=sharing', 'cocitation_cora': 'https://drive.google.com/file/d/1WVRx5yDxSdZpvL6FK5Ji8H3lOnyYlraN/view?usp=sharing', 'cocitation_pubmed': 'https://drive.google.com/file/d/1XbqDJnHnV0HYvie3fcM8rquamnQsLTpK/view?usp=sharing'}#
download() None[source]#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None[source]#

Handle the data for the dataset.

This method loads the US county demographics data, applies any pre- processing transformations if specified, and saves the processed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.

topobench.data.datasets.mantra_dataset module#

Dataset class MANTRA dataset.

class topobench.data.datasets.mantra_dataset.MantraDataset(root: str, name: str, parameters: DictConfig, **kwargs)[source]#

Bases: InMemoryDataset

Dataset class for MANTRA manifold dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

**kwargsdict

Additional keyword arguments.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'2_manifolds': 'json.gz', '3_manifolds': 'json.gz'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'2_manifolds': 'https://github.com/aidos-lab/mantra/releases/download/{version}/2_manifolds.json.gz', '3_manifolds': 'https://github.com/aidos-lab/mantra/releases/download/{version}/3_manifolds.json.gz'}#
download() None[source]#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None[source]#

Handle the data for the dataset.

This method loads the JSON file for MANTRA for the specified manifold dimmension, applies the respective preprocessing if specified and saves the preprocessed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.

topobench.data.datasets.us_county_demos_dataset module#

Dataset class for US County Demographics dataset.

class topobench.data.datasets.us_county_demos_dataset.USCountyDemosDataset(root: str, name: str, parameters: DictConfig)[source]#

Bases: InMemoryDataset

Dataset class for US County Demographics dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'US-county-demos': 'zip'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'US-county-demos': 'https://drive.google.com/file/d/1FNF_LbByhYNICPNdT6tMaJI9FxuSvvLK/view?usp=sharing'}#
download() None[source]#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None[source]#

Handle the data for the dataset.

This method loads the US county demographics data, applies any pre- processing transformations if specified, and saves the processed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.

Module contents#

Dataset module with automated exports.

class topobench.data.datasets.CitationHypergraphDataset(root: str, name: str, parameters: DictConfig)#

Bases: InMemoryDataset

Dataset class for US County Demographics dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'coauthorship_cora': 'zip', 'coauthorship_dblp': 'zip', 'cocitation_citeseer': 'zip', 'cocitation_cora': 'zip', 'cocitation_pubmed': 'zip'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'coauthorship_cora': 'https://drive.google.com/file/d/1J5fLPABWrM9SH_7m85n7--oHDVmwJeib/view?usp=sharing', 'coauthorship_dblp': 'https://drive.google.com/file/d/16ryf4Ve-t0_nAla0VfjtSxSAG8Sye8TZ/view?usp=sharing', 'cocitation_citeseer': 'https://drive.google.com/file/d/1XWfu1jtijsmHmfCP6UQxyLsuPM8GBNJb/view?usp=sharing', 'cocitation_cora': 'https://drive.google.com/file/d/1WVRx5yDxSdZpvL6FK5Ji8H3lOnyYlraN/view?usp=sharing', 'cocitation_pubmed': 'https://drive.google.com/file/d/1XbqDJnHnV0HYvie3fcM8rquamnQsLTpK/view?usp=sharing'}#
download() None#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None#

Handle the data for the dataset.

This method loads the US county demographics data, applies any pre- processing transformations if specified, and saves the processed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.

class topobench.data.datasets.MantraDataset(root: str, name: str, parameters: DictConfig, **kwargs)#

Bases: InMemoryDataset

Dataset class for MANTRA manifold dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

**kwargsdict

Additional keyword arguments.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'2_manifolds': 'json.gz', '3_manifolds': 'json.gz'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'2_manifolds': 'https://github.com/aidos-lab/mantra/releases/download/{version}/2_manifolds.json.gz', '3_manifolds': 'https://github.com/aidos-lab/mantra/releases/download/{version}/3_manifolds.json.gz'}#
download() None#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None#

Handle the data for the dataset.

This method loads the JSON file for MANTRA for the specified manifold dimmension, applies the respective preprocessing if specified and saves the preprocessed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.

class topobench.data.datasets.USCountyDemosDataset(root: str, name: str, parameters: DictConfig)#

Bases: InMemoryDataset

Dataset class for US County Demographics dataset.

Parameters:
rootstr

Root directory where the dataset will be saved.

namestr

Name of the dataset.

parametersDictConfig

Configuration parameters for the dataset.

Attributes:
URLS (dict): Dictionary containing the URLs for downloading the dataset.
FILE_FORMAT (dict): Dictionary containing the file formats for the dataset.
RAW_FILE_NAMES (dict): Dictionary containing the raw file names for the dataset.
FILE_FORMAT: ClassVar = {'US-county-demos': 'zip'}#
RAW_FILE_NAMES: ClassVar = {}#
URLS: ClassVar = {'US-county-demos': 'https://drive.google.com/file/d/1FNF_LbByhYNICPNdT6tMaJI9FxuSvvLK/view?usp=sharing'}#
download() None#

Download the dataset from a URL and saves it to the raw directory.

Raises:

FileNotFoundError – If the dataset URL is not found.

process() None#

Handle the data for the dataset.

This method loads the US county demographics data, applies any pre- processing transformations if specified, and saves the processed data to the appropriate location.

property processed_dir: str#

Return the path to the processed directory of the dataset.

Returns:
str

Path to the processed directory.

property processed_file_names: str#

Return the processed file name for the dataset.

Returns:
str

Processed file name.

property raw_dir: str#

Return the path to the raw directory of the dataset.

Returns:
str

Path to the raw directory.

property raw_file_names: list[str]#

Return the raw file names for the dataset.

Returns:
list[str]

List of raw file names.