#!/usr/bin/env python3 import numpy as np import pandas as pd from sklearn.decomposition import PCA from sklearn.manifold import TSNE import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt import matplotlib.cm as cm plt.style.use('bmh') import os import sys import argparse parser = argparse.ArgumentParser() parser.add_argument('workDir', type=str) parser.add_argument('--names', type=str, nargs='+', required=True) args = parser.parse_args() y = pd.read_csv("{}/labels.csv".format(args.workDir)).as_matrix()[:, 0] X = pd.read_csv("{}/reps.csv".format(args.workDir)).as_matrix() target_names = np.array(args.names) colors = cm.gnuplot2(np.linspace(0, 0.7, len(target_names))) X_pca = PCA(n_components=50).fit_transform(X, X) tsne = TSNE(n_components=2, init='random', random_state=0) X_r = tsne.fit_transform(X_pca) for c, i, target_name in zip(colors, list(range(1, len(target_names) + 1)), target_names): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], c=c, label=target_name) plt.legend() plt.savefig("{}/tsne.pdf".format(args.workDir))