Dexamethasone Treated A549 ATAC Peak Embedding

[144]:

import scanpy as sc
import os
import pandas as pd
import numpy as np
import pickle as pkl
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy.stats
import seaborn as sns
import pickle as pkl
from scipy.io import mmread

sc.settings.verbosity = 3

[145]:

def read_data(path):
    adata = sc.read_mtx(path + "_peak_count.txt.gz").T
    adata.obs = pd.read_csv(path + "_cell.txt.gz", index_col=0, compression='gzip')
    adata.var = pd.read_csv(path + "_peak.txt.gz", index_col=0, compression='gzip')
    return adata

atac_adata = read_data("../../sci-car/GSM3271041_ATAC_sciCAR_A549")
atac_adata = atac_adata[atac_adata.obs.group.str.contains("^A549"), :]


atac_only_adata = read_data("../../sci-car/GSM3271043_ATAC_only_A549")
atac_only_adata = atac_only_adata[atac_only_adata.obs.group.str.contains("^A549"), :]

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\IPython\core\interactiveshell.py:3337: DtypeWarning: Columns (2) have mixed types.Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
AnnData expects .var.index to contain strings, but your first indices are: Int64Index([1, 2], dtype='int64', name='id'), …
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
AnnData expects .var.index to contain strings, but your first indices are: Int64Index([1, 2], dtype='int64', name='id'), …

[146]:

adata = sc.concat(adatas=[atac_adata, atac_only_adata])

adata.var['peak'] = atac_adata.var['peak'].tolist()
adata.var['chr'] = [str(i) for i in atac_adata.var['chr'].tolist()]
adata.var['start'] = atac_adata.var['start'].tolist()
adata.var['end'] = atac_adata.var['end'].tolist()

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:119: ImplicitModificationWarning: Transforming to str index.
  warnings.warn("Transforming to str index.", ImplicitModificationWarning)

[147]:

adata.var.index = adata.var.peak
adata.obs['treatment_time'] = adata.obs.group.apply(lambda x: x[-2])
adata.obs

[147]:

	source	group	experiment	treatment_time
sample
sci-RNA-A-071.GCGGAGTCGA	Human	A549_3h	co_assay	3
sci-RNA-A-071.TTGCAGCATT	Human	A549_1h	co_assay	1
sci-RNA-A-071.GCGGCCAATC	Human	A549_3h	co_assay	3
sci-RNA-A-071.CTGAAGAGAC	Human	A549_1h	co_assay	1
sci-RNA-A-071.GGCTGCCTTA	Human	A549_0h	co_assay	0
...	...	...	...	...
sci-ATAConly-118.AGCGATCCGC	Human	A549_1h	ATAC_only	1
sci-ATAConly-118.ATGAGTTCTC	Human	A549_0h	ATAC_only	0
sci-ATAConly-118.TCTATCGGTA	Human	A549_3h	ATAC_only	3
sci-ATAConly-118.TCCGCCGGTC	Human	A549_3h	ATAC_only	3
sci-ATAConly-118.ATCTAGGTTC	Human	A549_0h	ATAC_only	0

6260 rows × 4 columns

[148]:

adata

[148]:

AnnData object with n_obs × n_vars = 6260 × 189603
    obs: 'source', 'group', 'experiment', 'treatment_time'
    var: 'peak', 'chr', 'start', 'end'

[149]:

adata.var

[149]:

	peak	chr	start	end
peak
1-9963-10665	1-9963-10665	1	9963	10665
1-11369-12010	1-11369-12010	1	11369	12010
1-24886-25386	1-24886-25386	1	24886	25386
1-29054-30366	1-29054-30366	1	29054	30366
1-36073-36581	1-36073-36581	1	36073	36581
...	...	...	...	...
hs37d5-35449616-35449816	hs37d5-35449616-35449816	hs37d5	35449616	35449816
hs37d5-35450394-35450635	hs37d5-35450394-35450635	hs37d5	35450394	35450635
hs37d5-35454173-35454373	hs37d5-35454173-35454373	hs37d5	35454173	35454373
hs37d5-35455021-35455259	hs37d5-35455021-35455259	hs37d5	35455021	35455259
hs37d5-35455475-35455685	hs37d5-35455475-35455685	hs37d5	35455475	35455685

189603 rows × 4 columns

[150]:

adata = adata[:, adata.var.chr.isin([str(i) for i in range(1, 23)] + ['X', 'Y'])]

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):

[151]:

adata.var.chr.value_counts().to_frame().T.style

[151]:

	1	2	3	12	17	5	7	11	8	19	10	6	9	16	15	4	14	X	20	22	18	13	21	Y
chr	15546	15021	10294	10043	9809	9670	9529	9457	8348	8201	7997	7755	7352	7213	7044	6901	6409	5304	4891	3495	3241	3146	1901	721

[152]:

adata.var['length'] = adata.var['end'] - adata.var['start']
adata.var

Trying to set attribute `.var` of view, copying.

[152]:

	peak	chr	start	end	length
peak
1-9963-10665	1-9963-10665	1	9963	10665	702
1-11369-12010	1-11369-12010	1	11369	12010	641
1-24886-25386	1-24886-25386	1	24886	25386	500
1-29054-30366	1-29054-30366	1	29054	30366	1312
1-36073-36581	1-36073-36581	1	36073	36581	508
...	...	...	...	...	...
Y-59349508-59350008	Y-59349508-59350008	Y	59349508	59350008	500
Y-59352997-59354670	Y-59352997-59354670	Y	59352997	59354670	1673
Y-59354957-59355457	Y-59354957-59355457	Y	59354957	59355457	500
Y-59355538-59356742	Y-59355538-59356742	Y	59355538	59356742	1204
Y-59360548-59361354	Y-59360548-59361354	Y	59360548	59361354	806

179288 rows × 5 columns

[ ]:

[153]:

sc.settings.set_figure_params(dpi=80, facecolor='white')
sc.pl.highest_expr_genes(adata, n_top=20)

normalizing counts per cell
    finished (0:00:00)

[154]:

#adata.X[adata.X > 0.] = 1.

[155]:

sc.pp.calculate_qc_metrics(adata, qc_vars=[], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts'], jitter=0.4, multi_panel=True)

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'source' as categorical
... storing 'group' as categorical
... storing 'experiment' as categorical
... storing 'treatment_time' as categorical
... storing 'chr' as categorical
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
  warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
  warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
  warnings.warn(single_var_warning.format("Vertical", "x"))
C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\seaborn\_core.py:1303: UserWarning: Vertical orientation ignored with only `x` specified.
  warnings.warn(single_var_warning.format("Vertical", "x"))

[156]:

sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])

[157]:

adata = adata[(adata.obs.total_counts >= 300) & (adata.obs.total_counts <= 20000), :]

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):

[158]:

# adata = adata[:, adata.var.gene_type == "protein_coding"]
sc.pp.filter_cells(adata, min_genes=300)
sc.pp.filter_genes(adata, min_cells=5)

filtered out 1220 cells that have less than 300 genes expressed

Trying to set attribute `.obs` of view, copying.

filtered out 46268 genes that are detected in less than 5 cells

[159]:

adata

[159]:

AnnData object with n_obs × n_vars = 3580 × 133020
    obs: 'source', 'group', 'experiment', 'treatment_time', 'n_genes_by_counts', 'total_counts', 'n_genes'
    var: 'peak', 'chr', 'start', 'end', 'length', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells'

[160]:

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

normalizing counts per cell
    finished (0:00:00)

[161]:

sc.pp.highly_variable_genes(adata)

extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

[162]:

sc.pl.highly_variable_genes(adata)

[163]:

adata.var.highly_variable.sum()

[163]:

[164]:

#adata.raw = adata

[165]:

#adata = adata[:, adata.var.highly_variable]

[166]:

import sys
sys.path.insert(0,'..')

import compactmarker

[167]:

sc.pp.regress_out(adata, ['total_counts'])

regressing out ['total_counts']
    sparse input is densified and may lead to high memory use

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])

    finished (0:08:18)

[168]:

sc.pp.scale(adata, max_value=10)
adata

[168]:

AnnData object with n_obs × n_vars = 3580 × 133020
    obs: 'source', 'group', 'experiment', 'treatment_time', 'n_genes_by_counts', 'total_counts', 'n_genes'
    var: 'peak', 'chr', 'start', 'end', 'length', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg'

[169]:

sc.tl.pca(adata, svd_solver='arpack')

computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:15)

[170]:

sc.pl.pca(adata, color=['treatment_time'])

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])

[171]:

sc.pl.pca_variance_ratio(adata, log=True)

[172]:

sc.pp.neighbors(adata, n_pcs=15)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['treatment_time', 'total_counts'], legend_loc='on data', size=5.)

computing neighbors
    using 'X_pca' with n_pcs = 15
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:05)

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])

[183]:

[183]:

	UMAP1	UMAP2	treatment_time
sample
sci-RNA-A-071.CTGAAGAGAC	-4.176722	-5.039095	1-hour
sci-RNA-A-071.GGCTGCCTTA	-8.144611	-3.831046	0-hour
sci-RNA-A-023.AATCGAACTC	-0.276823	-0.886354	1-hour
sci-RNA-A-023.ATGAGTTCTC	-7.511479	-3.676493	0-hour
sci-RNA-A-023.TAGAATAGCC	-6.214170	-7.592634	1-hour
...	...	...	...
sci-ATAConly-118.CGTAAGGAGT	-10.707880	-3.975512	0-hour
sci-ATAConly-118.ATGAGTTCTC	-8.785486	-1.737620	0-hour
sci-ATAConly-118.TCTATCGGTA	-1.769733	-1.345214	3-hour
sci-ATAConly-118.TCCGCCGGTC	-5.051011	0.317150	3-hour
sci-ATAConly-118.ATCTAGGTTC	-7.935543	-1.015721	0-hour

3580 rows × 3 columns

[189]:

[sns.color_palette("tab10")[i] for i in [1, 0, 2]]

[189]:

[(1.0, 0.4980392156862745, 0.054901960784313725),
 (0.12156862745098039, 0.4666666666666667, 0.7058823529411765),
 (0.17254901960784313, 0.6274509803921569, 0.17254901960784313)]

[193]:

df = pd.DataFrame(adata.obsm['X_umap'], columns=['UMAP1', 'UMAP2'])
df['treatment_time'] = [i + '-hour' for i in adata.obs['treatment_time'].tolist()]
df['UMAP1'] = -df['UMAP1']
sns.jointplot(data=df.sort_values(by='treatment_time', ascending=False), x="UMAP1", y="UMAP2", hue="treatment_time", s=4,
              palette=[sns.color_palette("tab10")[i] for i in [2, 0, 1]])

[193]:

<seaborn.axisgrid.JointGrid at 0x275d4127748>

[ ]:

[182]:

df.index = adata.obs_names
df.to_pickle("a549-atac-umap.pkl")

[219]:

tf = pd.read_hdf("a549-motif.hdf", "motif")

[220]:

df_peak = pd.DataFrame(adata[tf.index.tolist(), :].X, index=tf.index, columns=adata.var_names)
df_peak

C:\Users\SLiang3\Miniconda3\envs\scanpy37\lib\site-packages\anndata\_core\anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):

[220]:

peak	1-9963-10665	1-29054-30366	1-91105-91605	1-133566-134066	1-135895-136395	1-137965-138465	1-234484-234984	1-237639-237872	1-360366-360566	1-525043-525243	...	Y-28816652-28817664	Y-28817780-28818002	Y-58827246-58827468	Y-58856009-58856209	Y-58862601-58862838	Y-58979330-58979530	Y-58992371-58992631	Y-58995548-58995784	Y-59016929-59017246	Y-59026458-59026658
sci-RNA-C-091.TCTATCGGTA	-0.462297	-0.005890	-0.029185	0.001729	-0.006477	0.001262	-0.028352	-0.040759	-0.059672	-0.058364	...	-0.077266	-0.015810	-0.075989	-0.037568	-0.040341	-0.041891	-0.029939	-0.020708	-0.021628	-0.032583
sci-RNA-D-058.AAGCATCCTA	-0.456264	-0.010410	-0.031898	-0.004537	-0.010938	-0.004936	-0.029889	-0.043972	-0.057596	-0.056411	...	-0.084975	-0.023334	-0.077601	-0.040208	-0.040555	-0.042906	-0.031785	-0.023389	-0.027675	-0.034350
sci-RNA-D-019.ACGATAGACT	-0.465793	-0.003271	-0.027613	0.005360	-0.003892	0.004853	-0.027462	-0.038898	-0.060874	-0.059496	...	-0.072800	-0.011451	-0.075056	-0.036038	-0.040217	-0.041303	-0.028869	-0.019154	-0.018124	-0.031559
sci-RNA-B-055.TCTATCGGTA	-0.458586	-0.008671	-0.030854	-0.002126	-0.009221	-0.002551	-0.029298	-0.042736	-0.058395	-0.057163	...	-0.082009	-0.020439	-0.076981	-0.039192	-0.040473	-0.042516	-0.031075	-0.022357	-0.025348	-0.033670
sci-RNA-A-068.TTGCAGCATT	-0.466967	-0.002391	-0.027085	0.006580	-0.003024	0.006059	-0.027163	-0.038272	-0.061278	-0.059876	...	-0.071299	-0.009986	-0.074742	-0.035524	-0.040176	-0.041105	-0.028510	-0.018632	-0.016947	-0.031215
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
sci-RNA-E-044.GGCTCGAGAT	-0.457952	-0.009146	-0.031140	-0.002784	-0.009690	-0.003203	-0.029459	-0.043074	-0.058176	-0.056957	...	-0.082819	-0.021230	-0.077150	-0.039470	-0.040495	-0.042623	-0.031269	-0.022639	-0.025984	-0.033856
sci-RNA-A-044.CGTCTATGAA	2.157020	-0.016426	-0.035510	-0.012878	-0.016875	-0.013186	-0.031935	-0.048249	-0.054833	-0.053811	...	-0.095236	-0.033349	-0.079746	-0.043724	-0.040839	-0.044258	-0.034242	-0.026957	-0.035725	-0.036702
sci-RNA-E-065.CCTAAGCGGT	-0.451095	-0.014283	-0.034223	-0.009906	-0.014760	-0.010247	-0.031206	-0.046725	-0.055818	-0.054737	...	-0.091580	-0.029781	-0.078981	-0.042471	-0.040738	-0.043776	-0.033367	-0.025686	-0.032857	-0.035864
sci-RNA-E-028.GCGGTTATTG	-0.455671	-0.010855	-0.032165	-0.005154	-0.011377	-0.005546	-0.030041	-0.044288	-0.057392	-0.056219	...	-0.085734	-0.024075	-0.077759	-0.040468	-0.040576	-0.043006	-0.031967	-0.023652	-0.028271	-0.034524
sci-RNA-B-063.CCATCGGACC	-0.461258	-0.006668	-0.029653	0.000650	-0.007245	0.000194	-0.028617	-0.041313	-0.059314	-0.058028	...	-0.078594	-0.017106	-0.076267	-0.038023	-0.040378	-0.042066	-0.030257	-0.021169	-0.022669	-0.032887

1434 rows × 133020 columns

[236]:

adata.var

[236]:

	peak	chr	start	end	length	n_cells_by_counts	mean_counts	pct_dropout_by_counts	total_counts	n_cells	highly_variable	means	dispersions	dispersions_norm	mean	std
peak
1-9963-10665	1-9963-10665	1	9963	10665	702	805	0.318371	87.140575	1993.0	633	True	1.097876	3.013239	2.303191	2.497401e-10	0.893167
1-29054-30366	1-29054-30366	1	29054	30366	1312	5	0.000958	99.920128	6.0	5	False	0.001945	0.526640	-2.415499	1.747405e-11	0.032843
1-91105-91605	1-91105-91605	1	91105	91605	500	9	0.002236	99.856230	14.0	9	False	0.007499	1.560148	-0.916107	3.674562e-12	0.067322
1-133566-134066	1-133566-134066	1	133566	134066	500	7	0.001438	99.888179	9.0	7	False	0.002772	0.898005	-1.876730	1.235005e-11	0.039164
1-135895-136395	1-135895-136395	1	135895	136395	500	5	0.000958	99.920128	6.0	5	False	0.001976	0.505286	-2.446479	7.819622e-13	0.033198
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
Y-58979330-58979530	Y-58979330-58979530	Y	58979330	58979530	200	14	0.003035	99.776358	19.0	12	False	0.015856	2.488539	0.430784	-1.528358e-11	0.091901
Y-58992371-58992631	Y-58992371-58992631	Y	58992371	58992631	260	11	0.002875	99.824281	18.0	9	False	0.010635	2.338237	0.212729	3.271336e-11	0.076084
Y-58995548-58995784	Y-58995548-58995784	Y	58995548	58995784	236	6	0.001438	99.904153	9.0	6	False	0.004191	1.264650	-1.344809	-1.284471e-12	0.050599
Y-59016929-59017246	Y-59016929-59017246	Y	59016929	59017246	317	20	0.005431	99.680511	34.0	18	True	0.016698	2.653505	0.670114	2.858354e-11	0.091827
Y-59026458-59026658	Y-59026458-59026658	Y	59026458	59026658	200	11	0.002556	99.824281	16.0	9	False	0.010572	2.197451	0.008480	-6.477635e-11	0.077295

133020 rows × 16 columns

[240]:

adata.var.sort_values("total_counts", ascending=False).iloc[:50, ].index.tolist()

[240]:

['1-568415-569780',
 '1-563942-565496',
 '17-22020593-22020946',
 '21-9825332-9826417',
 '21-9826789-9827405',
 '3-197900601-197901064',
 '19-39339898-39341473',
 '17-57920269-57922103',
 '19-42772189-42773586',
 '19-54693146-54695160',
 '12-53318890-53321579',
 '1-149223186-149224731',
 '19-39173985-39175364',
 '19-39902423-39904452',
 '11-65265876-65267383',
 '19-41769319-41771027',
 '17-8089321-8091375',
 '9-73033086-73036019',
 '12-52540791-52542779',
 '19-50379506-50382216',
 '19-33667373-33668962',
 '15-60689229-60691441',
 '12-125423646-125425060',
 '19-47758737-47761346',
 '12-53773139-53774560',
 '2-44394608-44396498',
 '19-41220540-41222372',
 '17-73029740-73031747',
 '17-45726342-45728562',
 '8-8085204-8086468',
 '12-52673854-52675240',
 '17-1619317-1621032',
 '10-47057243-47058423',
 '1-16839831-16841280',
 '12-95085-95715',
 '3-119812513-119814710',
 '5-180648778-180651166',
 '19-50143315-50144338',
 '17-57914489-57915863',
 '2-24306665-24308585',
 '2-27273032-27274773',
 '19-50179544-50181482',
 '19-45393326-45394633',
 '17-75281889-75284160',
 '17-43248249-43250632',
 '5-134260824-134261544',
 '19-41255458-41257455',
 '3-5019893-5021277',
 '2-55276116-55278597',
 'X-149106346-149108982']

[221]:

df_peak.to_hdf("a549-peak.hdf", 'peak')

[222]:

adata.obs.loc[tf.index.tolist(), :].to_csv('a549-anno.csv')

[223]:

print(*[i for i in adata.obs.loc[tf.index.tolist(), 'treatment_time']])

3 0 3 3 1 1 3 3 3 1 1 3 1 1 3 3 3 3 3 3 3 3 0 0 3 1 0 1 3 1 1 3 3 3 3 0 3 1 1 1 1 1 1 1 3 0 3 3 3 0 0 1 3 3 0 1 1 1 3 3 0 1 3 3 3 3 1 3 1 1 1 0 3 3 1 3 3 3 0 1 3 1 1 3 1 1 0 3 1 3 1 1 0 1 3 3 1 3 3 1 3 1 3 1 0 1 0 3 1 3 1 0 3 3 1 3 0 0 0 3 0 1 0 3 0 1 0 0 1 1 1 1 0 1 3 3 0 1 1 0 0 3 1 0 0 0 3 1 0 0 3 1 1 1 0 3 3 1 1 1 3 0 1 0 3 1 3 1 3 1 1 3 1 3 0 0 3 0 1 1 1 1 0 1 1 0 0 1 3 3 1 3 1 0 3 3 0 3 0 1 1 1 1 1 0 1 3 1 0 1 3 0 0 0 3 3 0 3 1 3 3 3 3 3 3 3 3 1 3 3 3 3 3 1 3 0 3 3 1 0 1 3 3 3 1 3 3 0 1 1 3 3 1 3 1 1 3 1 0 1 1 1 0 3 1 1 1 1 0 0 1 3 3 0 1 1 3 3 0 3 3 3 0 0 3 3 0 0 1 3 3 0 3 1 3 3 0 0 0 1 0 1 3 1 1 1 0 0 1 3 1 3 0 1 3 1 1 3 0 3 3 3 1 1 3 0 3 0 1 3 3 3 1 3 0 0 1 0 1 0 3 1 1 0 1 1 3 0 1 1 3 0 3 3 0 3 3 0 1 1 0 0 3 1 3 0 0 1 3 3 3 1 3 1 1 0 0 0 3 1 0 3 3 3 0 0 0 1 3 1 3 1 3 3 3 3 1 1 3 0 0 3 1 3 0 3 0 0 3 0 3 1 1 3 0 1 3 1 3 3 3 3 1 0 1 3 3 3 0 1 1 3 3 1 3 0 1 1 1 3 1 1 1 0 1 3 1 3 1 3 1 1 3 3 1 1 3 1 0 3 3 3 1 1 1 3 3 0 3 3 3 3 0 1 1 3 3 0 0 1 1 1 3 1 3 3 1 1 1 0 3 1 3 3 3 0 3 1 1 0 0 3 3 0 1 3 3 0 1 1 0 3 1 1 1 1 1 1 1 1 3 3 1 3 3 3 1 0 3 0 1 1 0 0 3 0 3 3 3 0 1 1 3 1 1 1 1 3 3 1 1 3 1 3 1 0 3 1 1 0 1 3 1 1 1 3 0 1 1 1 1 1 1 1 0 0 0 0 0 1 1 3 1 1 0 1 3 1 3 3 0 1 3 3 0 3 3 1 3 3 3 0 1 0 3 3 0 3 3 1 3 0 1 1 3 1 3 1 1 0 0 3 3 3 3 3 1 0 3 3 1 3 3 0 3 3 3 3 1 1 1 1 0 1 0 1 0 3 1 1 1 3 0 3 3 3 0 1 0 3 3 1 3 3 0 3 3 3 3 0 0 3 1 0 0 1 3 1 0 0 1 0 1 3 0 1 0 3 1 1 1 0 1 3 3 0 0 3 1 0 3 3 0 0 0 3 3 1 0 3 1 3 3 1 0 1 3 1 0 1 3 3 3 3 1 1 1 0 3 1 1 1 1 3 3 0 0 3 1 0 3 3 1 1 1 1 3 3 3 0 1 3 0 1 3 1 1 0 3 1 1 1 3 1 0 0 3 1 0 1 1 1 0 3 0 1 1 0 3 1 1 3 1 1 1 0 0 1 1 0 1 1 3 1 1 3 3 3 3 0 0 1 1 3 0 3 1 3 1 1 1 1 1 0 0 0 1 1 1 1 0 0 1 3 1 0 3 1 1 1 3 1 3 3 3 1 0 1 1 1 1 0 1 3 0 0 0 1 3 3 0 0 3 3 3 1 1 1 3 1 0 0 3 3 0 1 3 0 1 3 3 1 1 0 1 3 3 1 1 1 3 1 3 1 0 1 0 1 0 1 3 0 1 3 1 0 1 1 0 1 0 0 0 3 0 1 3 0 1 0 1 0 0 3 3 0 1 3 0 3 0 1 1 3 1 1 0 1 1 3 3 1 0 0 1 3 3 3 1 1 3 3 1 3 1 0 1 1 0 3 0 1 0 1 1 3 0 0 1 3 1 3 1 0 0 1 1 0 3 1 1 3 3 1 1 1 3 3 3 1 1 3 1 0 3 1 0 3 1 1 1 3 3 0 3 3 3 3 3 3 3 1 3 3 3 0 3 3 3 3 0 1 3 1 0 3 3 1 0 1 0 3 3 3 1 0 3 1 0 1 3 3 1 0 0 3 3 0 3 3 0 1 3 1 3 3 1 3 3 1 0 0 1 0 1 3 1 0 3 0 0 1 1 0 1 3 3 1 1 1 0 1 1 3 0 1 1 3 3 1 3 3 3 1 0 1 3 1 3 3 3 1 1 1 1 1 3 1 3 1 3 3 1 3 3 1 1 1 1 0 0 1 0 1 1 1 0 3 3 3 0 0 0 3 0 0 3 1 3 1 0 0 3 3 1 1 3 0 0 1 1 3 1 1 0 1 1 1 3 1 0 3 1 1 3 3 3 0 0 3 1 1 0 3 3 3 3 3 0 1 3 0 3 1 0 3 1 1 0 0 3 0 3 1 1 3 3 1 3 3 3 3 0 1 3 0 1 0 3 3 1 1 0 1 1 3 1 0 3 1 0 1 1 3 0 1 1 3 0 3 0 3 1 0 3 0 3 0 3 1 3 3 1 3 0 1 0 3 0 0 0 1 1 0 1 3 0 0 3 3 1 1 1 1 0 1 3 3 0 0 3 0 0 0 3 0 3 3 1 1 0 1 1 3 0 1 0 1 3 0 1 1 0 3 1 0 1 1 1 3 1 1 0 3 1 1 3 0 1 1 1 1 0 1 0 1 1 0 0 3 0 1 3 3 1 3 0 0 3 3 0 3 0 3 1 3 3 0 1 0 1 0 3 0 1 3 3 3 0 3 3 0 0 1 0 0 0 1 3 1 1 1 1 1 0 3 1 3 0 1 1 1 3 1 3 0 3 3 3 0 0 0 3 3 0 3 3 3 3 0 3 3 3 0 3 3 3 3 1 0 3 3 0 3 0 0 1 0 1 0 1 1 3 3 3 3 3 3 3 3 1 1 0 1 3 3 3 1 1 1 0 0 0 3 3 1 1 3 1 3 1 0 0 0

[230]:

tf.columns[tf.columns.str.contains("NR3C")].tolist()

[230]:

['MA0113.3_NR3C1', 'MA0727.1_NR3C2']