Skip to content

Commit bc9ea88

Browse files
downstream
1 parent 85a2386 commit bc9ea88

3 files changed

Lines changed: 90 additions & 39 deletions

File tree

src/netmap/downstream/final_downstream.py

Lines changed: 81 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,85 @@ def filter_signatures_by_Ucell(selected_edges, adata) -> pd.DataFrame:
182182
data_ucell = adata.obs.filter(like='_UCell')
183183
return data_ucell
184184

185+
def select_top_edges(gene_inter_adata, adata, top_per_source=10, col_cluster='leiden_remap', min_reg_size=10, verbose=True, return_copy = False):
186+
"""
187+
Selects top gene targets per source from a clustered gene interaction AnnData.
188+
189+
Parameters
190+
----------
191+
gene_inter_adata : AnnData
192+
Gene interaction AnnData with `var` containing 'source' and 'target'.
193+
adata : AnnData
194+
Expression AnnData for ranking genes.
195+
top_per_source : int, default=750
196+
Number of top targets to select per source.
197+
col_cluster : str, default='spectral'
198+
Column in obs defining clusters.grn_adata3.var
199+
200+
Returns
201+
-------
202+
gene_inter_adata_filtered : AnnData
203+
Filtered AnnData containing top edges.
204+
reglon_sizes : list of int
205+
Sizes of regulatory regions per source.
206+
207+
"""
208+
209+
min_edge_support = 0.5
210+
211+
if verbose: print(f"Initial shape: {gene_inter_adata.shape}")
212+
213+
# Rank genes per cluster
214+
sc.tl.rank_genes_groups(adata, groupby=col_cluster, method='t-test')
215+
clusters = list(set(np.unique(gene_inter_adata.obs[col_cluster])).intersection(adata.obs[col_cluster]))
216+
217+
# Merge ranking data across clusters
218+
rank_dfs = []
219+
for c in clusters:
220+
if verbose: print(f"Ranking cluster: {c}")
221+
df = sc.get.rank_genes_groups_df(adata, group=str(c))
222+
df = df.sort_values('scores', ascending=False)
223+
df[f"rank_{c}"] = np.arange(1, len(df) + 1)
224+
rank_dfs.append(df[['names', f'rank_{c}']])
225+
df_rank = reduce(lambda l, r: pd.merge(l, r, on='names', how='inner'), rank_dfs)
226+
227+
keep_edges_dict = {}
228+
# Compute differences per cluster
229+
230+
for c in clusters:
231+
Keep_edges, reglon_sizes = [], []
232+
if verbose: print(f"Selecting targets for cluster: {c}")
233+
df_rank_c = df_rank.copy()
234+
rank_cols = [col for col in df_rank.columns if col != 'names']
235+
rank_cols.remove(f"rank_{c}")
236+
df_rank_c['avg'] = df_rank_c[rank_cols].mean(axis=1)
237+
df_rank_c['diff'] = (df_rank_c[f"rank_{c}"] - df_rank_c['avg']).abs()
238+
df_rank_c = df_rank_c.sort_values('diff', ascending=False)
239+
240+
241+
for source in gene_inter_adata.var["source"].unique():
242+
if df_rank_c.loc[df_rank_c['names'] == source, 'diff'].shape[0] > 0:
243+
tf_rank = df_rank_c.loc[df_rank_c['names'] == source, 'diff'].values[0]
244+
245+
df_targets = (
246+
gene_inter_adata.var[
247+
(gene_inter_adata.var['source'] == source) &
248+
(gene_inter_adata.var[f'{c}_nonzero'] >= min_edge_support)
249+
]
250+
.merge(df_rank_c[['names', 'diff']],
251+
left_on='target', right_on='names', how='left')
252+
)
253+
254+
df_targets['rank_distance'] = (df_targets['diff'] - tf_rank).abs()
255+
df_targets = df_targets.sort_values('rank_distance').head(top_per_source)
256+
257+
reglon_sizes.append(len(df_targets))
258+
if len(df_targets) >= min_reg_size:
259+
Keep_edges.extend(f"{source}_{t}" for t in df_targets['target'])
260+
261+
keep_edges_dict[c] = Keep_edges
262+
keep_edges_dict = process_cell_edges(keep_edges)
263+
return keep_edges_dict
185264

186265

187266

@@ -235,16 +314,15 @@ def compute_signatures_UCell_scores(selected_edges, adata, key='unique') -> pd.D
235314
"""
236315

237316
all_signatures = {}
238-
for ct in resi[key]:
239-
sign = resi[key][ct]['edges'].groupby('source')['target'].apply(list).to_dict()
317+
for ct in selected_edges[key]:
318+
sign = selected_edges[key][ct]['edges'].groupby('source')['target'].apply(list).to_dict()
240319
sign = {f"{ct}_{k}": v for k, v in sign.items()}
241320
all_signatures = all_signatures | sign
242321

243322
ucell.compute_ucell_scores(adata, signatures=all_signatures, n_jobs=1)
244323
data_ucell = adata.obs.filter(like='_UCell')
245324
return data_ucell
246325

247-
248326
def filter_grn_by_top_signatures(data_ucell: pd.DataFrame, grn_adata: ad.AnnData, keep_top_ranked: int = 100, filter_by: str = "z_score", cluster_col = 'spectral') -> Tuple[Optional[ad.AnnData], List[str]]:
249327
"""
250328
Filters a GRN (Gene Regulatory Network) AnnData object to keep only the edges
@@ -320,36 +398,6 @@ def filter_grn_by_top_signatures(data_ucell: pd.DataFrame, grn_adata: ad.AnnData
320398
return grn_adata_filtered, top_sources_list
321399

322400

323-
# def filter_grn_by_top_signatures(data_ucell: pd.DataFrame, grn_adata: AnnData, keep_top_ranked: int = 100) -> Tuple[Optional[AnnData], List[str]]:
324-
325-
# if grn_adata.var.empty:
326-
# return None, []
327-
328-
# df = data_ucell.copy()
329-
# features = [c for c in df.columns if c.endswith('_UCell')]
330-
# clusters = sorted(df['spectral'].unique())
331-
# all_results = []
332-
333-
# for cl in clusters:
334-
# g1, g2 = df[df['spectral'] == cl], df[df['spectral'] != cl]
335-
# res = []
336-
# for f in features:
337-
# try:
338-
# s, p = mannwhitneyu(g1[f], g2[f], alternative='two-sided')
339-
# res.append({'cluster': cl, 'gene_set': f, 'mean_diff': g1[f].mean() - g2[f].mean(), 'pval': p})
340-
# except:
341-
# res.append({'cluster': cl, 'gene_set': f, 'mean_diff': 0, 'pval': 1})
342-
# res = pd.DataFrame(res)
343-
# res['padj'] = multipletests(res['pval'], method='fdr_bh')[1]
344-
# all_results.append(res.sort_values(['padj','mean_diff'], ascending=[True, False]).head(keep_top_ranked))
345-
346-
# combined = pd.concat(all_results, ignore_index=True)
347-
# top_sources = [s.split("_UCell")[0] for s in list(combined["gene_set"])]
348-
# top_sources_list = list(set(top_sources))
349-
350-
# grn_adata_filtered = grn_adata[:, grn_adata.var["source"].isin(top_sources_list)].copy()
351-
352-
# return grn_adata_filtered, top_sources_list
353401

354402

355403

@@ -415,9 +463,7 @@ def plot_shared_targets_heatmap(grn_adata, genes=None, figsize=(6, 6), cmap='RdB
415463
plt.suptitle(title, y=1.05)
416464
plt.show()
417465

418-
#return shared_target_matrix
419466

420-
#**********************************************************************
421467

422468

423469

src/netmap/downstream/plotting.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@ def rank_regulon_groups_dotplot(grn_adata_filtered, adata_regl, original_cluster
2424
colheaders = grn_adata_filtered.var.columns[grn_adata_filtered.var.columns.str.contains('nonzero')]
2525

2626
colheaders = list(colheaders)
27-
colheaders.remove('count_nonzero')
28-
colheaders.remove('count_nonzero_norm')
27+
if 'count_nonzero' in colheaders:
28+
colheaders.remove('count_nonzero')
29+
if 'count_nonzero_norm' in colheaders:
30+
colheaders.remove('count_nonzero_norm')
2931

3032
adata_regl.var['regulon_name'] = [x.replace('_UCell', '') for x in adata_regl.var.index]
3133
adata_regl.var['source'] = [x.split('_')[-1] for x in adata_regl.var['regulon_name']]
@@ -36,8 +38,8 @@ def rank_regulon_groups_dotplot(grn_adata_filtered, adata_regl, original_cluster
3638
regulon = adata_regl.var.regulon_name[ri]
3739
sou = adata_regl.var.source[ri]
3840
count = count+1
39-
fractions[f'{regulon}_UCell'] = grn_adata_filtered.var[grn_adata_filtered.var.source == sou][colheaders].sum()/grn_adata_filtered.var[grn_adata_filtered.var.source == sou].shape[0]
40-
41+
fractions[f'{regulon}_UCell'] = grn_adata_filtered.var[grn_adata_filtered.var.source == sou][colheaders].sum()
42+
4143
fractions = pd.DataFrame(fractions)
4244
fractions.index = [x.replace('_nonzero', '') for x in fractions.index]
4345

@@ -46,6 +48,8 @@ def rank_regulon_groups_dotplot(grn_adata_filtered, adata_regl, original_cluster
4648
pp = sc.pl.rank_genes_groups_dotplot(adata_regl, n_genes=n_genes, key=key, groupby=new_cluster_column, cmap=cmap, figsize=figsize, values_to_plot=values_to_plot, return_fig = True)
4749
fractions = fractions.reindex(list(pp.dot_size_df.index))
4850

51+
pp.dot_size_df = fractions.loc[:, pp.dot_color_df.columns]
52+
print(fractions)
4953
pp.dot_size_df = pp.dot_size_df/(pp.dot_size_df.max())
5054

5155
if return_fig:

src/netmap/masking/internal.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def add_cluster_based_candidate_edges(grn_adata, cluster_column = 'leiden_remap'
167167

168168

169169

170+
170171
def find_consistent_pairs(grn_adata, gene_names):
171172
"""
172173
Creates a dictionary of binary masks for each cell and pair of genes,

0 commit comments

Comments
 (0)