@@ -182,6 +182,85 @@ def filter_signatures_by_Ucell(selected_edges, adata) -> pd.DataFrame:
182182 data_ucell = adata .obs .filter (like = '_UCell' )
183183 return data_ucell
184184
185+ def select_top_edges (gene_inter_adata , adata , top_per_source = 10 , col_cluster = 'leiden_remap' , min_reg_size = 10 , verbose = True , return_copy = False ):
186+ """
187+ Selects top gene targets per source from a clustered gene interaction AnnData.
188+
189+ Parameters
190+ ----------
191+ gene_inter_adata : AnnData
192+ Gene interaction AnnData with `var` containing 'source' and 'target'.
193+ adata : AnnData
194+ Expression AnnData for ranking genes.
195+ top_per_source : int, default=750
196+ Number of top targets to select per source.
197+ col_cluster : str, default='spectral'
198+ Column in obs defining clusters.grn_adata3.var
199+
200+ Returns
201+ -------
202+ gene_inter_adata_filtered : AnnData
203+ Filtered AnnData containing top edges.
204+ reglon_sizes : list of int
205+ Sizes of regulatory regions per source.
206+
207+ """
208+
209+ min_edge_support = 0.5
210+
211+ if verbose : print (f"Initial shape: { gene_inter_adata .shape } " )
212+
213+ # Rank genes per cluster
214+ sc .tl .rank_genes_groups (adata , groupby = col_cluster , method = 't-test' )
215+ clusters = list (set (np .unique (gene_inter_adata .obs [col_cluster ])).intersection (adata .obs [col_cluster ]))
216+
217+ # Merge ranking data across clusters
218+ rank_dfs = []
219+ for c in clusters :
220+ if verbose : print (f"Ranking cluster: { c } " )
221+ df = sc .get .rank_genes_groups_df (adata , group = str (c ))
222+ df = df .sort_values ('scores' , ascending = False )
223+ df [f"rank_{ c } " ] = np .arange (1 , len (df ) + 1 )
224+ rank_dfs .append (df [['names' , f'rank_{ c } ' ]])
225+ df_rank = reduce (lambda l , r : pd .merge (l , r , on = 'names' , how = 'inner' ), rank_dfs )
226+
227+ keep_edges_dict = {}
228+ # Compute differences per cluster
229+
230+ for c in clusters :
231+ Keep_edges , reglon_sizes = [], []
232+ if verbose : print (f"Selecting targets for cluster: { c } " )
233+ df_rank_c = df_rank .copy ()
234+ rank_cols = [col for col in df_rank .columns if col != 'names' ]
235+ rank_cols .remove (f"rank_{ c } " )
236+ df_rank_c ['avg' ] = df_rank_c [rank_cols ].mean (axis = 1 )
237+ df_rank_c ['diff' ] = (df_rank_c [f"rank_{ c } " ] - df_rank_c ['avg' ]).abs ()
238+ df_rank_c = df_rank_c .sort_values ('diff' , ascending = False )
239+
240+
241+ for source in gene_inter_adata .var ["source" ].unique ():
242+ if df_rank_c .loc [df_rank_c ['names' ] == source , 'diff' ].shape [0 ] > 0 :
243+ tf_rank = df_rank_c .loc [df_rank_c ['names' ] == source , 'diff' ].values [0 ]
244+
245+ df_targets = (
246+ gene_inter_adata .var [
247+ (gene_inter_adata .var ['source' ] == source ) &
248+ (gene_inter_adata .var [f'{ c } _nonzero' ] >= min_edge_support )
249+ ]
250+ .merge (df_rank_c [['names' , 'diff' ]],
251+ left_on = 'target' , right_on = 'names' , how = 'left' )
252+ )
253+
254+ df_targets ['rank_distance' ] = (df_targets ['diff' ] - tf_rank ).abs ()
255+ df_targets = df_targets .sort_values ('rank_distance' ).head (top_per_source )
256+
257+ reglon_sizes .append (len (df_targets ))
258+ if len (df_targets ) >= min_reg_size :
259+ Keep_edges .extend (f"{ source } _{ t } " for t in df_targets ['target' ])
260+
261+ keep_edges_dict [c ] = Keep_edges
262+ keep_edges_dict = process_cell_edges (keep_edges )
263+ return keep_edges_dict
185264
186265
187266
@@ -235,16 +314,15 @@ def compute_signatures_UCell_scores(selected_edges, adata, key='unique') -> pd.D
235314 """
236315
237316 all_signatures = {}
238- for ct in resi [key ]:
239- sign = resi [key ][ct ]['edges' ].groupby ('source' )['target' ].apply (list ).to_dict ()
317+ for ct in selected_edges [key ]:
318+ sign = selected_edges [key ][ct ]['edges' ].groupby ('source' )['target' ].apply (list ).to_dict ()
240319 sign = {f"{ ct } _{ k } " : v for k , v in sign .items ()}
241320 all_signatures = all_signatures | sign
242321
243322 ucell .compute_ucell_scores (adata , signatures = all_signatures , n_jobs = 1 )
244323 data_ucell = adata .obs .filter (like = '_UCell' )
245324 return data_ucell
246325
247-
248326def filter_grn_by_top_signatures (data_ucell : pd .DataFrame , grn_adata : ad .AnnData , keep_top_ranked : int = 100 , filter_by : str = "z_score" , cluster_col = 'spectral' ) -> Tuple [Optional [ad .AnnData ], List [str ]]:
249327 """
250328 Filters a GRN (Gene Regulatory Network) AnnData object to keep only the edges
@@ -320,36 +398,6 @@ def filter_grn_by_top_signatures(data_ucell: pd.DataFrame, grn_adata: ad.AnnData
320398 return grn_adata_filtered , top_sources_list
321399
322400
323- # def filter_grn_by_top_signatures(data_ucell: pd.DataFrame, grn_adata: AnnData, keep_top_ranked: int = 100) -> Tuple[Optional[AnnData], List[str]]:
324-
325- # if grn_adata.var.empty:
326- # return None, []
327-
328- # df = data_ucell.copy()
329- # features = [c for c in df.columns if c.endswith('_UCell')]
330- # clusters = sorted(df['spectral'].unique())
331- # all_results = []
332-
333- # for cl in clusters:
334- # g1, g2 = df[df['spectral'] == cl], df[df['spectral'] != cl]
335- # res = []
336- # for f in features:
337- # try:
338- # s, p = mannwhitneyu(g1[f], g2[f], alternative='two-sided')
339- # res.append({'cluster': cl, 'gene_set': f, 'mean_diff': g1[f].mean() - g2[f].mean(), 'pval': p})
340- # except:
341- # res.append({'cluster': cl, 'gene_set': f, 'mean_diff': 0, 'pval': 1})
342- # res = pd.DataFrame(res)
343- # res['padj'] = multipletests(res['pval'], method='fdr_bh')[1]
344- # all_results.append(res.sort_values(['padj','mean_diff'], ascending=[True, False]).head(keep_top_ranked))
345-
346- # combined = pd.concat(all_results, ignore_index=True)
347- # top_sources = [s.split("_UCell")[0] for s in list(combined["gene_set"])]
348- # top_sources_list = list(set(top_sources))
349-
350- # grn_adata_filtered = grn_adata[:, grn_adata.var["source"].isin(top_sources_list)].copy()
351-
352- # return grn_adata_filtered, top_sources_list
353401
354402
355403
@@ -415,9 +463,7 @@ def plot_shared_targets_heatmap(grn_adata, genes=None, figsize=(6, 6), cmap='RdB
415463 plt .suptitle (title , y = 1.05 )
416464 plt .show ()
417465
418- #return shared_target_matrix
419466
420- #**********************************************************************
421467
422468
423469
0 commit comments