Source code for evomap.datasets

"""
Sample data for demonstration purpose.
"""
from importlib import resources
import numpy as np
import os
import pandas as pd

[docs]def load_tnic_snapshot(): """ Load example data. Original data comes from https://hobergphillips.tuck.dartmouth.edu/. Returns ------- dict Dictionary containing the similarity matrix, firm labels and cluster assignments (based on community detection). """ with resources.path("evomap.data.tnic_snapshot", "sim_mat.npy") as f: sim_mat = np.load(f) with resources.path("evomap.data.tnic_snapshot", "cluster.npy") as f: cluster = np.load(f) with resources.path("evomap.data.tnic_snapshot", "label.npy") as f: label = np.load(f) with resources.path("evomap.data.tnic_snapshot", "size.npy") as f: size = np.load(f) TNIC_testdata = { 'matrix': sim_mat, 'label': label, 'cluster': cluster, 'size': size} return TNIC_testdata
[docs]def load_tnic_snapshot_small(): """ Load example data. Original data comes from https://hobergphillips.tuck.dartmouth.edu/. Returns ------- dict Dictionary containing the similarity matrix, firm labels and cluster assignments (based on community detection). """ with resources.path("evomap.data.tnic_snapshot_small", "sim_mat.npy") as f: sim_mat = np.load(f) with resources.path("evomap.data.tnic_snapshot_small", "cluster.npy") as f: cluster = np.load(f) with resources.path("evomap.data.tnic_snapshot_small", "labels.npy") as f: labels = np.load(f) TNIC_testdata = {'matrix': sim_mat, 'labels': labels, 'cluster': cluster} return TNIC_testdata
[docs]def load_tnic_sample(): """ Load example data. Original data comes from https://hobergphillips.tuck.dartmouth.edu/. Returns ------- dict Dictionary containing the similarity matrix, firm labels and cluster assignments (based on community detection). """ with resources.path("evomap.data.tnic_sample", "tnic_sample.csv") as f: df_tnic = pd.read_csv(f) return df_tnic
[docs]def load_tnic_sample_small(): """ Load example data. Original data comes from https://hobergphillips.tuck.dartmouth.edu/. Returns ------- dict Dictionary containing the similarity matrix, firm labels and cluster assignments (based on community detection). """ with resources.path("evomap.data.tnic_sample_small", "tnic_sample_small.csv") as f: df_tnic = pd.read_csv(f) df_tnic['sic1'] = df_tnic['sic1'].map(lambda x: str(x)) df_tnic['sic2'] = df_tnic['sic2'].map(lambda x: str(x)) return df_tnic
[docs]def load_tnic_sample_tech(unbalanced = False): if unbalanced: with resources.path('evomap.data.tnic_sample_tech', 'tnic_sample_technology_with_netflix.csv') as f: df_tnic = pd.read_csv(f) else: with resources.path('evomap.data.tnic_sample_tech', 'tnic_sample_technology.csv') as f: df_tnic = pd.read_csv(f) return df_tnic
[docs]def load_car_data(): """ Load car dataset, containing perceptual and preference ratings of competing cars. Data is taken from the following book, with some modifications: Lilien, G. L., & Rangaswamy, A. (2004). Marketing engineering: computer-assisted marketing analysis and planning (2nd revised edition). DecisionPro. Returns ------- dict Dictionary containing the perceptual ratings and preference ratings. """ with resources.path("evomap.data.cars", "customer_preference_ratings.csv") as f: df_preferences = pd.read_csv(f) with resources.path("evomap.data.cars", "perceptual_attribute_ratings.csv") as f: df_attributes = pd.read_csv(f, index_col = 0) return {'preferences': df_preferences, 'attributes': df_attributes}