import matplotlib.pyplot as plt from matplotlib import pyplot from sklearn.datasets import make_regression from sklearn.preprocessing import MinMaxScaler import sklearn import numpy as np from scipy.stats import pearsonr import csv import numpy as np import pandas as pd import json import random class DataExporter: def __init__(self, path): self.path = path def get_correlation_string(self, correlation): if correlation == -0.8: return 'N' elif correlation == 0.8: return 'P' else: return 'NC' def get_monotonic_type_string(self, monotonic, type_monotonic): if monotonic: if type_monotonic == 1: return 'MI' else: return 'MD' else: return 'NM' def get_outfile_name(self, r, monotonic, type_monotonic, r_exception, monotonic_exception, type_monotonic_exception, geotemp_config): ######### BASE ######### name = 'usstates_' ######### ADD PATTERN BEHAVIOR ########### name += 'PB-' ######### CORRELATION ######### corr_string = self.get_correlation_string(r) name += corr_string + '_' ######### MONOTONIC BEHAVIOR ######### name += self.get_monotonic_type_string(monotonic, type_monotonic) name += '_' ######### ADD NOISE BEHAVIOR ########## name += 'NB-' ######### CORRELATION ######### corr_string = self.get_correlation_string(r_exception) name += corr_string + '_' ######### MONOTONIC BEHAVIOR ######### name += self.get_monotonic_type_string(monotonic_exception, type_monotonic_exception) name += '_' ######### NOISE GEO ######### percentage = round(geotemp_config.locations_noise_perc * 100) name += 'NG' + str(percentage) ######### FORCED LOCATION ######### location = geotemp_config.location_forced if location is not None: name += '_F!GL-' + str(location) ######### FORCED LOCATION RANGE ######### zone = geotemp_config.location_range_forced if zone is not None: name += '_F!GR-' + str(zone) ######### NOISE TIME ######### percentage = round(geotemp_config.times_noise_perc * 100) name += '_NT' + str(percentage) ######### FORCED TIME ######### time = geotemp_config.time_forced if time is not None: name += '_F!T-' + str(time) ######### FORCED TIME RANGE ######### period = geotemp_config.time_range_forced if period is not None: name += '_F!TR-' + str(period) return name def export(self, df, r, monotonic, type_monotonic, r_exception, monotonic_exception, type_monotonic_exception, geotemp_config): output = self.get_outfile_name(r, monotonic, type_monotonic, r_exception, monotonic_exception, type_monotonic_exception, geotemp_config) print(output) self.generate_csv(df, output + '.csv') self.generate_json(df, output + '.json') def generate_csv(self, df, filename): df.to_csv(self.path + filename) def generate_json(self, df, filename): copy = df.copy() copy.drop('notnoise_geo', 1, inplace = True) copy.drop(df.index[len(df)-1], inplace = True) ids = pd.read_json('VEGA/data/obesity.json') ids.head() data2 = pd.merge(copy, ids, on='state') data2.drop(['rate'], axis=1, inplace=True) data2.head() data2.to_json(self.path + filename, orient='records') centers = pd.read_csv('us_states_codes_centers.csv') with open(self.path + filename) as json_file: data = json.load(json_file) for d in data: this_row = (centers[centers.Abbreviation == d['state'] ]).iloc[0] geometries = { 'coordinates': [this_row.x, this_row.y]} d['geometries'] = geometries with open(self.path + filename, 'w') as outfile: json.dump(data, outfile)