PBMC CITE-Seq Unbiased Feature Selection
[1]:
import scanpy as sc
import os
import pandas as pd
import numpy as np
import pickle as pkl
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats
sc.settings.verbosity = 3
[2]:
rna_adata = sc.read("../../CITE-seq/rna.h5ad")
[3]:
data = pd.read_csv("../../CITE-seq/GSE100866_PBMC_vs_flow_10X-ADT_umi.csv", index_col=0)
data
[3]:
| ACCGTAAGTGTAATGA | CGTGAGCTCGAGAACG | CACATTTAGAATTCCC | TACGGTATCTGGGCCA | TCAGGTAGTAAGTTCC | TGATTTCGTTCTCATT | ACACTGAAGGCCCTCA | ACGGGTCGTCACACGC | AGCTTGACATCCCATC | ACGTCAATCCGTCATC | ... | TTCTCCTAGATCGATA | GGAAAGCGTCGACTAT | GACTAACCAACACCCG | AGCGTCGTCCTCGCAT | TCTCATAAGTTTGCGT | GTCGGGTAGAGCTGGT | GTCGGGTAGGTAGCCA | GTCGGGTAGTCTTGCA | ATGTGTGGTCCGTTAA | CGTATGCCGTCTTCTG | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CD3 | 9 | 17 | 18 | 12 | 18 | 16 | 15 | 23 | 18 | 9 | ... | 40 | 184 | 2 | 88 | 8 | 115 | 53 | 10 | 5 | 5 |
| CD4 | 8 | 17 | 18 | 12 | 20 | 29 | 27 | 20 | 28 | 19 | ... | 93 | 157 | 15 | 47 | 6 | 112 | 69 | 14 | 7 | 5 |
| CD8 | 13 | 21 | 16 | 16 | 18 | 25 | 11 | 23 | 21 | 10 | ... | 180 | 26 | 9 | 4 | 8 | 8 | 5 | 32 | 7 | 9 |
| CD2 | 9 | 10 | 7 | 8 | 5 | 15 | 10 | 4 | 3 | 4 | ... | 38 | 184 | 1 | 50 | 2 | 245 | 87 | 21 | 3 | 7 |
| CD45RA | 56 | 37 | 36 | 53 | 74 | 69 | 46 | 44 | 75 | 41 | ... | 66 | 87 | 22 | 302 | 31 | 10 | 12 | 182 | 28 | 22 |
| CD57 | 19 | 40 | 32 | 21 | 33 | 55 | 41 | 30 | 52 | 32 | ... | 15 | 15 | 24 | 7 | 15 | 9 | 3 | 10 | 19 | 7 |
| CD16 | 15 | 11 | 4 | 7 | 14 | 30 | 20 | 22 | 25 | 8 | ... | 17 | 8 | 9 | 1 | 4 | 5 | 0 | 9 | 4 | 2 |
| CD14 | 10 | 7 | 13 | 9 | 15 | 25 | 24 | 21 | 28 | 9 | ... | 152 | 6 | 8 | 3 | 4 | 3 | 2 | 24 | 6 | 3 |
| CD11c | 7 | 6 | 5 | 10 | 14 | 18 | 8 | 18 | 23 | 11 | ... | 203 | 7 | 7 | 3 | 2 | 4 | 1 | 29 | 5 | 1 |
| CD19 | 15 | 14 | 12 | 19 | 17 | 22 | 17 | 12 | 18 | 4 | ... | 7 | 5 | 12 | 1 | 6 | 1 | 3 | 3 | 5 | 4 |
10 rows × 7985 columns
[4]:
adata = sc.AnnData(data.T)
[5]:
adata = adata[rna_adata.obs.index, ]
[6]:
sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.pl.highest_expr_genes(adata, n_top=20)
normalizing counts per cell
finished ({time_passed})
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\scanpy\preprocessing\_normalization.py:138: UserWarning: Revieved a view of an AnnData. Making a copy.
view_to_actual(adata)
[7]:
sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'], jitter=0.4, multi_panel=True)
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
warnings.warn(single_var_warning.format("Vertical", "x"))
[8]:
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
[9]:
adata.var.highly_variable = True
[ ]:
[10]:
#sc.pp.normalize_total(adata, target_sum=1e4)
import scipy.stats.mstats
def clr(adata):
temp = adata.X + 1
adata.X = np.log(temp / scipy.stats.mstats.gmean(temp, axis=1).reshape([-1, 1]))
clr(adata)
[11]:
sc.pp.scale(adata, max_value=2)
[12]:
np.isnan(adata.X).sum()
[12]:
0
[13]:
sc.tl.pca(adata, svd_solver='arpack')
computing PCA
with n_comps=9
finished (0:00:00)
[14]:
sc.pl.pca(adata, color=data.index)
[15]:
sc.pl.pca_variance_ratio(adata, log=True)
[16]:
sc.pp.neighbors(adata, n_neighbors=50)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=2.)
computing neighbors
using data matrix X directly
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:04)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:15)
running Leiden clustering
finished: found 24 clusters and added
'leiden', the cluster labels (adata.obs, categorical) (0:00:06)
[17]:
sc.pl.umap(adata, color=['leiden'], ncols=3, legend_loc="on data", legend_fontsize=8.)
[18]:
sc.pl.umap(adata, color=data.index, ncols=3, legend_loc="on data", legend_fontsize=8.)
[19]:
filtered_adata = adata.copy()
proteins = filtered_adata.var.index.to_series().apply(lambda x: 'PROTEIN_' + x).tolist()
rnas = rna_adata.raw.var.index.to_series().apply(lambda x: '' + x).tolist()
merged_adata = sc.AnnData(pd.DataFrame(np.hstack([filtered_adata.X, rna_adata.raw.X]),
columns=proteins+rnas))
merged_adata.obs = filtered_adata.obs
merged_adata.obsm = filtered_adata.obsm
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:119: ImplicitModificationWarning: Transforming to str index.
warnings.warn("Transforming to str index.", ImplicitModificationWarning)
[20]:
#print(*merged_adata.var_names)
[21]:
sc.pl.umap(merged_adata, color=['leiden',
'GZMB', 'GZMA', 'NCAM1', 'CD244', # NK (Granzyme)
'CD3D', 'CD3E', 'CD3G', 'CD4', 'CD8A', 'CD8B', # T CD4/CD8
'CD14', 'FCGR3A', 'FCGR3B', # MONO CD14 / (CD16 = FCGR)
'CD19', 'PTPRC', 'CD79A', # B (CD45R = PTPRC)
'HLA-DRA', 'CD68', 'IL3RA', 'CLEC4C', 'NRP1', # DC
'CXCL9', 'CXCL10', 'CD86'
], ncols=3, legend_loc="on data", legend_fontsize=8.)
B cells:
Cluster 3, 13, and 23 are classified as B cells given high CD19 expression in protein, and high CD19 and CD79A in mRNA. Because cluster 23 also has high expression of CD3, and thus is annotated as “CD3+” B cells.
T cells:
Cluster 0, 1, 2, 8, 9, 12, 21, 22 (bottom left) and cluster 6, 11, 15, 16 are clearly high in CD3, and are all T-cells. Cluster 0, 1, 2, 8, 9, 12 are CD4 T cells; Cluster 6, 11, 16 are CD8 T cells; Cluster 21, 22 are CD4+ CD8+ (double positive) T cells, labeled as DP T; CD22 is also high in CD57 Cluster 15 are CD4- CD8- (double negative) T cells, labeled as DN T;
NK cells:
Cluster 7, 9, 10, 18 are high in CD16, GZMA/B, NCAM1 (CD56), and CD244. Cluster 18 is high in CD57.
Monocytes:
Cluster 4, 5, 14, 19, 20, 17 are high in CD11c and CD68 Cluster 4, 5, 14, 19 are high in CD14 Cluster 14 are high in CD8
[22]:
cluster2subtype = {'0': 'CD45RA+ CD4 T',
'1': 'CD4 T',
'2': 'CD4 T',
'3': 'B',
'4': 'CD14+ CD16+ Mono',
'5': 'CD14+ Mono',
'6': 'CD8 T',
'7': 'NK',
'8': 'CD45RA+ CD4 T',
'9': 'NK',
'10': 'NK',
'11': 'CD8 T',
'12': 'CD45RA+ CD4 T',
'13': 'B',
'14': 'CD3+ CD8+ CD14+ Mono',
'15': 'DN T',
'16': 'CD57+ CD8 T',
'17': 'CD16+ Mono',
'18': 'CD57+ NK',
'19': 'CD3+ CD14+ Mono',
'20': 'CD14- CD16- Mono',
'21': 'DP T',
'22': 'CD57+ DP T',
'23': 'CD3+ B'}
adata.obs['cell subtype'] = [cluster2subtype[i] for i in adata.obs.leiden]
filtered_adata.obs['cell subtype'] = [cluster2subtype[i] for i in adata.obs.leiden]
merged_adata.obs['cell subtype'] = [cluster2subtype[i] for i in adata.obs.leiden]
cluster2type = {'0': 'CD4 T',
'1': 'CD4 T',
'2': 'CD4 T',
'3': 'B',
'4': 'Mono',
'5': 'Mono',
'6': 'CD8 T',
'7': 'NK',
'8': 'CD4 T',
'9': 'NK',
'10': 'NK',
'11': 'CD8 T',
'12': 'CD4 T',
'13': 'B',
'14': 'Mono',
'15': 'DN T',
'16': 'CD8 T',
'17': 'Mono',
'18': 'NK',
'19': 'Mono',
'20': 'Mono',
'21': 'DP T',
'22': 'DP T',
'23': 'B'}
adata.obs['cell type'] = [cluster2type[i] for i in adata.obs.leiden]
filtered_adata.obs['cell type'] = [cluster2type[i] for i in adata.obs.leiden]
merged_adata.obs['cell type'] = [cluster2type[i] for i in adata.obs.leiden]
[23]:
protein2rna = {'PROTEIN_CD2': ['CD2'],
'PROTEIN_CD3': ['CD3D', 'CD3E', 'CD3G'],
'PROTEIN_CD4': ['CD4'],
'PROTEIN_CD8': ['CD8A', 'CD8B'],
'PROTEIN_CD11c': ['ITGAX'],
'PROTEIN_CD14': ['CD14'],
'PROTEIN_CD16': ['FCGR3A', 'FCGR3B'],
'PROTEIN_CD19': ['CD19'],
'PROTEIN_CD45RA': ['PTPRC'],
'PROTEIN_CD57': ['B3GAT1']
}
for k, v in protein2rna.items():
sc.pl.umap(merged_adata, color=[k] + v, ncols=4, legend_fontsize=8.)
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead
if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'cell subtype' as categorical
... storing 'cell type' as categorical
[24]:
sc.pl.umap(adata, color=['cell type', 'cell subtype'], ncols=4, legend_fontsize=8.)
... storing 'cell subtype' as categorical
... storing 'cell type' as categorical
[25]:
#import sys
#sys.path.insert(0,'..')
#import compactmarker._tsne_l1
#np.random.seed(0)
#model = compactmarker._tsne_l1.TsneL1.tune(50, rna_adata.X, filtered_adata.X) # , max_inner_iter=1, max_outer_iter=1
[26]:
#np.random.seed(0)
#model = compactmarker._tsne_l1.TsneL1.tune(50, rna_adata.X, filtered_adata.X,
# min_lasso=1.2409377607517196e-05, max_lasso=1.539926526059492e-05)
#import torch
#torch.autograd.set_detect_anomaly(True)
[39]:
import sys
sys.path.insert(0,'..')
import compactmarker
model = compactmarker.UmapL1(lasso=1e-3, ridge=1e-3, n_pcs=None, perplexity=100., use_beta_in_Q=True, n_threads=6,
max_outer_iter=2) # , max_inner_iter=1, max_outer_iter=1
model.fit(rna_adata.X, X_teacher=filtered_adata.X)
Calculating distance matrix and scaling factors...
Computing pairwise distances...
Using 6 threads...
Mean value of sigma: 0.362250
Done. Elapsed time: 61.17 seconds. Total: 61.17 seconds.
Creating model without batches...
Optimizing using OWLQN (because lasso is nonzero)...
0 loss: 38.25368118286133 Nonzero: 69 Elapsed time: 472.48 seconds. Total: 533.65 seconds.
1 loss: 3.821220874786377 Nonzero: 69 Elapsed time: 278.84 seconds. Total: 812.49 seconds.
final loss: 3.8009145259857178 Nonzero: 69 Elapsed time: 6.67 seconds. Total: 819.16 seconds.
[39]:
<compactmarker._umap_l1.UmapL1 at 0x1c99fc46a08>
[45]:
import sys
sys.path.insert(0,'..')
import compactmarker
model = compactmarker.UmapL1(lasso=5e-4, ridge=0., n_pcs=None, perplexity=100., use_beta_in_Q=True, n_threads=6,
max_outer_iter=2) # , max_inner_iter=1, max_outer_iter=1
model.fit(rna_adata.X, X_teacher=filtered_adata.X)
Calculating distance matrix and scaling factors...
Computing pairwise distances...
Using 6 threads...
Mean value of sigma: 0.362250
Done. Elapsed time: 56.94 seconds. Total: 56.94 seconds.
Creating model without batches...
Optimizing using OWLQN (because lasso is nonzero)...
0 loss: 12.995180130004883 Nonzero: 27 Elapsed time: 468.43 seconds. Total: 525.37 seconds.
1 loss: 3.8472976684570312 Nonzero: 26 Elapsed time: 461.75 seconds. Total: 987.12 seconds.
final loss: 3.816372871398926 Nonzero: 26 Elapsed time: 6.67 seconds. Total: 993.79 seconds.
[45]:
<compactmarker._umap_l1.UmapL1 at 0x1c99fdf4088>
[46]:
print(*rna_adata.var_names[model.get_mask()])
AIF1 CD7 CD74 CD79A CD8B CST7 CTSS FCER1G GNLY HLA-DPA1 HLA-DRA HLA-DRB1 IL32 LEF1 LGALS1 LST1 LYZ NKG7 PRF1 RPL13 S100A11 S100A4 S100A8 S100A9 TRBC2 TYROBP
[47]:
rnas = rna_adata.raw.var.index.to_series().apply(lambda x: '' + x).tolist()
merged_adata2 = rna_adata.copy()
protein_markers = ['PROTEIN_' + i for i in filtered_adata.var.index]
for i, marker in enumerate(filtered_adata.var.index.tolist()):
merged_adata2.obs['PROTEIN_' + marker] = filtered_adata.X[:, i]
[48]:
sc.pl.pca(adata, color='cell type')
[51]:
new_adata = model.transform(merged_adata2.copy())
new_adata.obs['cell type'] = filtered_adata.obs['cell type']
new_adata.obs['cell subtype'] = filtered_adata.obs['cell subtype']
sc.tl.pca(new_adata, svd_solver='arpack')
sc.pl.pca(new_adata, color='cell type', frameon=False)
sc.pp.neighbors(new_adata, n_pcs=5, use_rep="X_pca")
sc.tl.umap(new_adata)
sc.pl.umap(new_adata, color=['cell type'], frameon=False, title="", size=3.)
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead
if not is_categorical(df_full[k]):
Trying to set attribute `.obs` of view, copying.
computing PCA
on highly variable genes
with n_comps=24
finished (0:00:00)
computing neighbors
finished: added to `.uns['neighbors']`
`.obsp['distances']`, distances for each pair of neighbors
`.obsp['connectivities']`, weighted adjacency matrix (0:00:01)
computing UMAP
finished: added
'X_umap', UMAP coordinates (adata.obsm) (0:00:09)
[ ]:
[32]:
new_adata
[32]:
AnnData object with n_obs × n_vars = 7634 × 48
obs: 'cell subtype', 'cell type', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'PROTEIN_CD3', 'PROTEIN_CD4', 'PROTEIN_CD8', 'PROTEIN_CD2', 'PROTEIN_CD45RA', 'PROTEIN_CD57', 'PROTEIN_CD16', 'PROTEIN_CD14', 'PROTEIN_CD11c', 'PROTEIN_CD19'
var: 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: 'cell subtype_colors', 'cell type_colors', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pca', 'umap'
obsm: 'X_pca', 'X_umap'
varm: 'PCs'
obsp: 'connectivities', 'distances'
[33]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
df = pd.DataFrame(new_adata.X, columns=new_adata.var.index)
corrMatrix = df.corr()
fig, ax = plt.subplots(figsize=(14, 12))
sn.heatmap(ax=ax, data=corrMatrix, xticklabels=True, yticklabels=True)
plt.show()
corr_matrix = corrMatrix.values
corr_matrix = np.abs(corr_matrix)
corr_values = []
for i in range(corr_matrix.shape[0]):
for j in range(i):
corr_values.append(corr_matrix[i, j])
plt.hist(corr_values, bins=50)
plt.title('Distribution of correlations of \n selected markers')
[33]:
Text(0.5, 1.0, 'Distribution of correlations of \n selected markers')
[34]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
df2 = pd.concat([df.reset_index(drop=True),
new_adata.obs.loc[:, new_adata.obs.columns.str.contains('PROTEIN_')].reset_index(drop=True)],
axis=1)
corrMatrix = df2.corr()
corrMatrix = corrMatrix.loc[corrMatrix.index.str.contains('PROTEIN_'), ~corrMatrix.index.str.contains('PROTEIN_')]
fig, ax = plt.subplots(figsize=(20, 3))
sn.heatmap(ax=ax, data=corrMatrix.abs(), xticklabels=True, yticklabels=True)
plt.show()
[35]:
corrMatrix.T.style
[35]:
| PROTEIN_CD3 | PROTEIN_CD4 | PROTEIN_CD8 | PROTEIN_CD2 | PROTEIN_CD45RA | PROTEIN_CD57 | PROTEIN_CD16 | PROTEIN_CD14 | PROTEIN_CD11c | PROTEIN_CD19 | |
|---|---|---|---|---|---|---|---|---|---|---|
| AC132872.2 | -0.057121 | -0.033678 | 0.022242 | -0.026684 | 0.015539 | 0.021639 | 0.023311 | 0.033757 | 0.043185 | 0.003818 |
| AP1G2 | 0.003806 | -0.007512 | -0.019456 | 0.014663 | 0.041407 | 0.014168 | -0.009372 | -0.050276 | -0.045060 | 0.024469 |
| APEH | -0.001326 | 0.003457 | -0.013882 | 0.018534 | 0.027218 | 0.025875 | 0.024947 | -0.045393 | -0.023549 | -0.005811 |
| C2orf68 | 0.004219 | 0.018819 | -0.013954 | -0.001032 | 0.005328 | -0.008308 | 0.006165 | -0.006577 | 0.008854 | -0.013827 |
| CBLB | -0.017304 | -0.055466 | 0.036698 | 0.061028 | 0.052467 | 0.016983 | -0.003008 | -0.053712 | -0.036736 | -0.016574 |
| CD320 | -0.004388 | -0.068123 | 0.056684 | 0.080928 | 0.054757 | 0.083013 | 0.011571 | -0.081302 | -0.082888 | -0.041756 |
| CD59 | 0.067997 | 0.059027 | -0.028081 | 0.077722 | -0.031213 | 0.019404 | -0.022022 | -0.075959 | -0.082218 | -0.006978 |
| CD82 | -0.006573 | 0.022886 | -0.110960 | 0.015401 | 0.015857 | 0.007061 | -0.057523 | -0.079809 | -0.074393 | 0.204989 |
| CDK10 | -0.005229 | -0.007795 | -0.001334 | 0.029529 | 0.023666 | 0.009640 | 0.004888 | -0.038625 | -0.018294 | -0.003952 |
| CITED4 | 0.124631 | 0.112310 | -0.043116 | 0.060291 | -0.018634 | -0.096972 | -0.061113 | -0.042644 | -0.035454 | -0.102378 |
| CNOT11 | 0.039936 | 0.013133 | 0.017130 | 0.063803 | -0.016901 | 0.004260 | -0.033437 | -0.041727 | -0.046556 | -0.027419 |
| COA5 | -0.014229 | -0.025784 | 0.018401 | -0.018384 | 0.020379 | 0.017014 | 0.010107 | -0.028425 | 0.002627 | 0.014582 |
| COMMD10 | -0.034393 | -0.001196 | 0.013050 | -0.026152 | -0.017461 | 0.003855 | 0.027445 | 0.030550 | 0.045945 | -0.006138 |
| CRYL1 | 0.010003 | 0.021328 | -0.011779 | 0.018476 | -0.029595 | -0.007285 | -0.013955 | -0.000704 | 0.013180 | -0.005122 |
| CST3 | -0.345264 | 0.019570 | 0.130521 | -0.486025 | -0.304183 | -0.238582 | 0.095101 | 0.742058 | 0.839373 | -0.144727 |
| DOCK9 | 0.060252 | 0.034208 | 0.003071 | 0.057710 | -0.021485 | -0.014831 | -0.020848 | -0.042459 | -0.054808 | -0.037967 |
| DPH5 | 0.069149 | 0.042724 | -0.026671 | 0.057574 | 0.000370 | -0.028330 | -0.049750 | -0.068381 | -0.070880 | 0.000552 |
| FCN1 | -0.306816 | 0.018249 | 0.116456 | -0.469859 | -0.314470 | -0.221331 | 0.084627 | 0.744712 | 0.788196 | -0.128639 |
| GABPB1-AS1 | 0.022763 | -0.007434 | -0.047978 | 0.018762 | 0.082215 | 0.009561 | -0.039467 | -0.081629 | -0.101957 | 0.070178 |
| GMPR2 | 0.015361 | 0.001043 | 0.007426 | 0.040610 | 0.016050 | 0.009150 | 0.003124 | -0.046416 | -0.031352 | -0.025418 |
| GPANK1 | 0.027088 | 0.012442 | -0.007912 | 0.029707 | 0.008911 | 0.001101 | -0.018153 | -0.044393 | -0.041172 | 0.003045 |
| HSD17B8 | 0.057555 | 0.031551 | -0.001858 | 0.051736 | -0.003797 | -0.025695 | -0.028289 | -0.048395 | -0.042747 | -0.037324 |
| KIN | -0.001901 | -0.001338 | -0.001949 | 0.010628 | -0.007908 | 0.009455 | 0.009179 | -0.011014 | 0.005309 | -0.001850 |
| KRTCAP3 | 0.050297 | 0.032055 | -0.023494 | 0.043469 | -0.006392 | -0.008872 | -0.030755 | -0.066973 | -0.058224 | 0.015308 |
| LINC00402 | 0.179249 | 0.128856 | -0.048477 | 0.104298 | -0.028654 | -0.092499 | -0.083487 | -0.091408 | -0.115161 | -0.092257 |
| LYZ | -0.308150 | 0.020707 | 0.129248 | -0.467036 | -0.335649 | -0.235306 | 0.047670 | 0.787555 | 0.802407 | -0.130433 |
| MRPL46 | 0.031722 | 0.012419 | -0.010568 | 0.029103 | 0.023233 | -0.001981 | -0.029543 | -0.052403 | -0.048489 | -0.001838 |
| NAGPA | -0.001235 | 0.005049 | -0.009468 | -0.003438 | -0.000618 | 0.007869 | -0.027073 | 0.004224 | 0.011885 | 0.005165 |
| NAT9 | 0.003746 | 0.010192 | -0.018497 | -0.010489 | 0.031342 | -0.014602 | -0.019943 | -0.018622 | -0.010718 | 0.011342 |
| NDEL1 | 0.018304 | 0.016491 | -0.009681 | 0.026141 | -0.001764 | -0.012347 | -0.003363 | -0.017089 | -0.008624 | -0.018704 |
| NKAP | -0.005028 | 0.001098 | -0.017527 | 0.023260 | -0.005454 | 0.029372 | 0.017674 | -0.025580 | -0.016604 | 0.019324 |
| POLDIP2 | -0.022635 | -0.022624 | 0.006805 | 0.018742 | 0.009673 | 0.030947 | 0.012325 | -0.018104 | 0.000050 | 0.005588 |
| PPP1R3E | 0.027233 | 0.010902 | -0.037340 | 0.022627 | 0.062267 | -0.001434 | -0.007526 | -0.074333 | -0.089508 | 0.035120 |
| PPP1R8 | -0.017425 | -0.025874 | 0.005143 | 0.013556 | 0.032592 | 0.031551 | 0.020002 | -0.039399 | -0.025948 | 0.014832 |
| RIF1 | 0.008047 | -0.000676 | 0.015667 | 0.017868 | -0.001064 | -0.029437 | -0.036240 | -0.000438 | -0.003309 | -0.011216 |
| RNF19A | -0.016699 | -0.031279 | -0.001289 | 0.006751 | 0.003266 | 0.043778 | -0.003904 | -0.017940 | -0.016350 | 0.047477 |
| SCAMP3 | -0.004628 | -0.025407 | 0.004629 | 0.040217 | 0.030370 | 0.009009 | -0.005542 | -0.050580 | -0.036885 | 0.015669 |
| SLC2A4RG | 0.078483 | 0.045201 | -0.011598 | 0.094449 | -0.026786 | 0.000995 | -0.018170 | -0.074844 | -0.070343 | -0.051508 |
| SNHG17 | 0.002901 | 0.009197 | 0.003716 | 0.002854 | -0.001650 | -0.014684 | -0.011865 | 0.006123 | -0.005473 | -0.005647 |
| SSR3 | -0.039209 | 0.017750 | 0.017859 | -0.052679 | -0.077998 | -0.042835 | -0.003835 | 0.113954 | 0.137226 | -0.023115 |
| TMEM245 | 0.049015 | 0.048746 | -0.021178 | 0.034155 | -0.026393 | -0.028445 | -0.017854 | -0.013756 | -0.017335 | -0.030610 |
| TOR1AIP1 | -0.026530 | -0.013282 | 0.010001 | 0.011240 | 0.017378 | 0.025602 | 0.017205 | -0.030057 | -0.006080 | 0.002709 |
| USP8 | -0.040339 | -0.012659 | -0.003770 | -0.038324 | -0.007250 | -0.011114 | -0.013279 | 0.040626 | 0.057500 | 0.036300 |
| VPS26B | -0.034631 | -0.038457 | 0.040399 | 0.013927 | 0.006315 | 0.045906 | 0.025707 | -0.006399 | 0.013654 | -0.029259 |
| WDR73 | 0.026131 | 0.010095 | -0.005092 | 0.018848 | 0.009678 | -0.004826 | 0.002599 | -0.049521 | -0.041935 | 0.003765 |
| XXbac-BPG299F13.17 | 0.029379 | -0.009988 | 0.004856 | 0.043457 | 0.017339 | -0.006785 | -0.018289 | -0.042830 | -0.064013 | 0.008200 |
| ZBTB1 | 0.000205 | 0.000028 | 0.001345 | 0.034133 | -0.008531 | 0.012861 | 0.000149 | -0.010494 | -0.022030 | 0.003472 |
| ZBTB20 | 0.028594 | 0.006783 | -0.031892 | 0.030955 | 0.036300 | 0.007254 | -0.019074 | -0.066323 | -0.070462 | 0.036235 |
[36]:
print(*(i for i in corrMatrix.T.index))
AC132872.2 AP1G2 APEH C2orf68 CBLB CD320 CD59 CD82 CDK10 CITED4 CNOT11 COA5 COMMD10 CRYL1 CST3 DOCK9 DPH5 FCN1 GABPB1-AS1 GMPR2 GPANK1 HSD17B8 KIN KRTCAP3 LINC00402 LYZ MRPL46 NAGPA NAT9 NDEL1 NKAP POLDIP2 PPP1R3E PPP1R8 RIF1 RNF19A SCAMP3 SLC2A4RG SNHG17 SSR3 TMEM245 TOR1AIP1 USP8 VPS26B WDR73 XXbac-BPG299F13.17 ZBTB1 ZBTB20
[37]:
adata.obs.to_csv("../../CITE-seq/human-pbmc-cell.csv")
[52]:
weight_df = pd.DataFrame(model.w[model.w > 0.], index=rna_adata.var_names[model.w > 0.])
weight_df.to_pickle("cite-seq-26-weights.pkl")
weight_df.to_csv("cite-seq-26-weigths.csv")
[56]:
merged_adata.write_loom("cite-seq.loom")
The loom file will lack these fields:
{'X_umap', 'X_pca'}
Use write_obsm_varm=True to export multi-dimensional annotations