import glob
from IPython.display import HTML
import logging
import os
import pandas as pd
from pybiomart import Dataset
import pyranges as pr
import ray
import subprocess
import shutil
import sys
from typing import Union, Dict, Sequence, Optional
from .utils import *
# Set stderr to null when using ray.init to avoid ray printing Broken pipe million times
_stderr = sys.stderr
null = open(os.devnull,'wb')
[docs]class Homer():
"""
Homer class.
:class:`Homer` contains Homer for motif enrichment analysis on sets of regions.
Attributes
---------
homer_path: str
Path to Homer bin folder.
bed_path: str
Path to bed file containing region set to be analyzed with Homer.
name: str
Analysis name.
outdir: str
Path to folder to output Homer results.
genome: str
Homer genome label to use.
size: str, optional
Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
mask: bool, optional
Whether to mask repeats or not. Default: True
denovo : bool, optional
Whether to infer overrepresented motifs de novo. Default: False
length: str, optional
Motif length values. Default: 8,10,12
meme_path: str, optional
Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
meme_collection_path: str, optional
Path to motif collection (in .cb format) to compare homer motifs with. Default: None
path_to_motif_annotations: str, optional
Path to motif annotations. If not provided, they will be downloaded from
https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
homo_sapiens and drosophila_melanogaster). Default: None
annotation_version: str, optional
Motif collection version. Default: v9
cistrome_annotation: List, optional
Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is
linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
(based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
motif_similarity_fdr: float, optional
Minimal motif similarity value to consider two motifs similar. Default: 0.001
orthologous_identity_threshold: float, optional
Minimal orthology value for considering two TFs orthologous. Default: 0.0
known_motifs: pd.DataFrame
A dataframe containing known motif enrichment results.
denovo_motifs: pd.DataFrame
A dataframe containing de novo motif enrichment results.
known_motif_hits: Dict
A dictionary containing regions with motif hits for each known motif.
denovo_motif_hits: Dict
A dictionary containing regions with motif hits for each de novo motif.
known_cistromes: Dict
A dictionary containing regions with motif hits for each TF found with known motifs.
denovo_motif_hits: Dict
A dictionary containing regions with motif hits for each TF found de novo.
References
---------
Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining
Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
"""
def __init__(self,
homer_path: str,
bed_path: str,
name: str,
outdir: str,
genome: str,
size: str = 'given',
mask: bool = True,
denovo: bool = False,
length: str = '8,10,12',
meme_path: str = None,
meme_collection_path: str = None,
path_to_motif_annotations: str = None,
annotation_version: str = 'v9',
cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
motif_similarity_fdr: float = 0.001,
orthologous_identity_threshold: float = 0.0):
"""
Initialize Homer class.
Parameters
---------
homer_path: str
Path to Homer bin folder.
bed_path: str
Path to bed file containing region set to be analyzed with Homer.
name: str
Analysis name.
outdir: str
Path to folder to output Homer results.
genome: str
Homer genome label to use.
size: str, optional
Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
mask: bool, optional
Whether to mask repeats or not. Default: True
denovo : bool, optional
Whether to infer overrepresented motifs de novo. Default: False
length: str, optional
Motif length values. Default: 8,10,12
meme_path: str, optional
Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
meme_collection_path: str, optional
Path to motif collection (in .cb format) to compare homer motifs with. Default: None
path_to_motif_annotations: str, optional
Path to motif annotations. If not provided, they will be downloaded from
https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
homo_sapiens and drosophila_melanogaster). Default: None
annotation_version: str, optional
Motif collection version. Default: v9
cistrome_annotation: List, optional
Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is
linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
(based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
motif_similarity_fdr: float, optional
Minimal motif similarity value to consider two motifs similar. Default: 0.001
orthologous_identity_threshold: float, optional
Minimal orthology value for considering two TFs orthologous. Default: 0.0
References
---------
Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining
Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
"""
self.homer_path = homer_path
self.bed_path = bed_path
self.genome = genome
self.outdir = outdir
self.size = size
self.len = length
self.mask = mask
self.denovo = denovo
self.name = name
self.meme_path = meme_path
self.meme_collection_path = meme_collection_path
self.path_to_motif_annotations = path_to_motif_annotations
self.annotation_version = annotation_version
self.cistrome_annotation = cistrome_annotation
self.motif_similarity_fdr = motif_similarity_fdr
self.orthologous_identity_threshold = orthologous_identity_threshold
self.known_motifs = None
self.denovo_motifs = None
self.known_motif_hits = None
self.denovo_motif_hits = None
self.known_cistromes = None
self.denovo_cistromes = None
[docs] def run(self):
"""
Run Homer
"""
# Create logger
level = logging.INFO
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level = level, format = format, handlers = handlers)
log = logging.getLogger('Homer')
if self.mask == True and self.denovo == False:
cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -mask -nomotif -keepFiles'
if self.mask == True and self.denovo == True:
cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -mask -keepFiles'
if self.mask == False and self.denovo == False:
cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -nomotif -keepFiles'
if self.mask == False and self.denovo == True:
cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -keepFiles'
cmd = cmd % (self.bed_path, self.genome, self.outdir, self.outdir, self.size, self.len)
log.info("Running Homer for " + self.name + " with %s", cmd)
try:
subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
try:
self.known_motifs = self.load_known()
except:
log.info('No known results found')
if self.denovo == True:
try:
self.denovo_motifs = self.load_denovo()
except:
log.info('No de novo results found')
log.info("Annotating motifs for " + self.name)
self.add_motif_annotation_homer()
log.info("Finding motif hits for " + self.name)
self.find_motif_hits(n_cpu=1)
log.info("Getting cistromes for " + self.name)
self.get_cistromes(self.cistrome_annotation)
[docs] def load_known(self):
"""
Load known motif enrichment results from file.
"""
known = pd.read_csv(os.path.join(self.outdir, 'knownResults.txt'), sep='\t')
return known
[docs] def load_denovo(self):
"""
Load de novo motif enrichment results from file.
"""
denovo = pd.read_html(os.path.join(self.outdir, 'homerResults.html'), header=0)[0].iloc[:,[7,2,3,4,5,6]]
denovo.iloc[:,0] = [x.split('More')[0] for x in denovo.iloc[:,0]]
denovo.to_csv(os.path.join(self.outdir, 'homerResults.txt'), sep='\t', index=False)
return denovo
[docs] def add_motif_annotation_homer(self):
"""
Add motif annotations (based on Homer, cisTarget and meme if specified)
"""
# Create logger
level = logging.INFO
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level = level, format = format, handlers = handlers)
log = logging.getLogger('Homer')
if self.known_motifs is not None:
if self.known_motifs.shape[0] != 0:
log.info('Annotating known motifs')
# Prepare cistarget annotation
if 'mm' in self.genome:
species = 'mus_musculus'
if 'dm' in self.genome:
species = 'drosophila_melanogaster'
if 'hg' in self.genome:
species = 'homo_sapiens'
ctx_motif_annotation = load_motif_annotations(species,
version = self.annotation_version,
fname = self.path_to_motif_annotations,
motif_similarity_fdr= self.motif_similarity_fdr,
orthologous_identity_threshold=self.orthologous_identity_threshold)
motifs = self.known_motifs
homer_motifs = 'homer__' + motifs['Consensus'] + '_' + [x.split('(')[0] for x in motifs['Motif Name']]
motifs['MotifID'] = homer_motifs
homer_motifs = [x for x in homer_motifs if x in ctx_motif_annotation.index.tolist()]
ctx_motif_annotation = ctx_motif_annotation.loc[list(set(homer_motifs))].reset_index()
# Prepare homer annotation
try:
homer_motif_annotation = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(self.homer_path), '..', 'motifs/extras/table.txt')),
sep='\t', error_bad_lines=False).iloc[:,[1,11]].dropna()
except:
homer_motif_annotation = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(self.homer_path), '..', 'motifs/extras/motifTable.txt')),
sep='\t', error_bad_lines=False).iloc[:,[1,11]].dropna()
homer_motif_annotation.columns = ['Name', 'Symbol']
# If not human, convert by homology
if species != 'homo_sapiens':
dataset = Dataset(name='hsapiens_gene_ensembl',
host='http://www.ensembl.org')
if species == 'mus_musculus':
biomart_query = 'mmusculus_homolog_associated_gene_name'
if species == 'drosophila_melanogaster':
biomart_query = 'dmelanogaster_homolog_associated_gene_name'
human2specie = dataset.query(attributes=['external_gene_name', biomart_query])
human2specie.index = human2specie['Gene name']
# Check that the TF has homolog
TF_names = [x for x in homer_motif_annotation.iloc[:,1].tolist() if x in human2specie.index.tolist()]
human2specie = human2specie.loc[TF_names,:]
human2specie.columns = ['Symbol', 'Homolog']
df = pd.merge(homer_motif_annotation, human2specie, on='Symbol', how='left')
homer_motif_annotation = df.iloc[:,[0,2]]
homer_motif_annotation.columns = ['Name', 'Symbol']
# We first bind the cisTarget annotation
motifs = pd.merge(motifs, ctx_motif_annotation, on='MotifID', how='left')
# We now bind the Homer annotation
homer_motif_annotation.columns = ['Motif Name', 'Homer_annot']
motifs = pd.merge(motifs, homer_motif_annotation, on='Motif Name', how='left')
# If Homer_annot is not in Direct_annot we will add it
# Concatenate
motifs.Direct_annot = [str(motifs.Direct_annot.tolist()[x]) + ', ' + str(motifs.Homer_annot.tolist()[x])
if (str(motifs.Homer_annot.tolist()[x]) not in str(motifs.Direct_annot.tolist()[x]))
else motifs.Direct_annot.tolist()[x] for x in range(motifs.shape[0])]
motifs.Direct_annot = motifs.Direct_annot.replace('nan, ', '', regex=True)
motifs.Direct_annot = motifs.Direct_annot.replace(', nan', '', regex=True)
motifs = motifs.drop(['MotifID', 'Homer_annot'], axis=1)
self.known_motifs = motifs
if self.denovo_motifs is not None:
if self.denovo_motifs.shape[0] != 0:
if self.meme_path is None:
log.info('Parameter meme_path is not provided. Skipping annotation of de novo motifs')
elif self.meme_collection_path is None:
log.info('Parameter meme_collection_path is not provided. Skipping annotation of de novo motifs')
else:
# Find closest match for denovo motifs in cistarget database (as meme)
log.info('Comparing de novo motifs with given motif collection with tomtom')
homer_motif_paths = glob.glob(os.path.join(self.outdir, 'homerResults', 'motif*[0-9$].motif'))
homer_motif_paths = [x for x in homer_motif_paths if 'similar' not in x]
tomtom_pd = pd.concat([tomtom(x, self.meme_path, self.meme_collection_path) for x in homer_motif_paths])
ctx_motif_annotation = load_motif_annotations(species,
version = self.annotation_version,
fname = self.path_to_motif_annotations,
motif_similarity_fdr= self.motif_similarity_fdr,
orthologous_identity_threshold=self.orthologous_identity_threshold)
homer_motifs = [x for x in tomtom_pd.iloc[:,1].tolist() if x in ctx_motif_annotation.index.tolist()]
ctx_motif_annotation = ctx_motif_annotation.loc[list(set(homer_motifs))].reset_index()
ctx_motif_annotation = ctx_motif_annotation.rename(columns={'MotifID': 'Best Match/Tomtom'})
# Bind cisTarget annotation
tomtom_pd = pd.merge(tomtom_pd, ctx_motif_annotation, on='Best Match/Tomtom', how='left')
motifs = pd.merge(self.denovo_motifs, tomtom_pd, on='Best Match/Details', how='left')
self.denovo_motifs = motifs
[docs] def find_motif_hits(self, n_cpu=1):
"""
Find motif hits with `homer2 find`
Parameters
---------
n_cpu: int
Number of cores to use.
"""
# Create logger
level = logging.INFO
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level = level, format = format, handlers = handlers)
log = logging.getLogger('Homer')
if self.known_motifs is not None:
if self.known_motifs.shape[0] != 0:
# Merge all motifs to file
log.info('Retrieving enriched regions per known motif')
if os.path.exists(os.path.join(self.outdir, 'knownResults', 'all_motifs.motif')):
os.remove(os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'))
for f in glob.glob(os.path.join(self.outdir, 'knownResults', '*.motif')):
os.system("cat "+f+" >> "+os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'))
cmd = os.path.join(self.homer_path, 'homer2 find') + ' -s %s -m %s -o %s -p %s'
cmd = cmd % (os.path.join(self.outdir, 'targetgiven.seq'), os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'), os.path.join(self.outdir, 'knownResults_motif_hits.bed'), n_cpu)
try:
subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
known_motif_hits = pd.read_csv(os.path.join(self.outdir, 'knownResults_motif_hits.bed'), sep='\t', header=None)
self.known_motif_hits = known_motif_hits.groupby(3)[0].apply(lambda g: list(set(g.values.tolist()))).to_dict()
if self.denovo_motifs is not None:
if self.denovo_motifs.shape[0] != 0:
# Merge all motifs to file
log.info('Retrieving enriched regions per de novo motif')
if os.path.exists(os.path.join(self.outdir, 'homerResults', 'all_motifs.motif')):
os.remove(os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
for f in glob.glob(os.path.join(self.outdir, 'homerResults', '*.motif')):
os.system("cat "+f+" >> "+os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
os.system("sed -i 's/\t.*BestGuess:/\t/g' "+os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
cmd = os.path.join(self.homer_path, 'homer2 find') + ' -s %s -m %s -o %s -p %s'
cmd = cmd % (os.path.join(self.outdir, 'targetgiven.seq'), os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'), os.path.join(self.outdir, 'homerResults_motif_hits.bed'), n_cpu)
try:
subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
denovo_motif_hits = pd.read_csv(os.path.join(self.outdir, 'homerResults_motif_hits.bed'), sep='\t', header=None)
denovo_motif_hits = denovo_motif_hits.groupby(3)[0].apply(lambda g: list(set(g.values.tolist()))).to_dict()
self.denovo_motif_hits = {k:denovo_motif_hits[k] for k in denovo_motif_hits.keys() if not k[0].isdigit()}
[docs] def get_cistromes(self, annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']):
"""
Format cistromes per TF
Parameters
---------
cistrome_annotation: List, optional
Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is
linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
(based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
"""
if self.known_motif_hits is not None:
if 'Direct_annot' in annotation:
tfs = get_TF_list(self.known_motifs, annotation=['Direct_annot'])
cistrome_dict_direct = {tf: get_cistrome_per_TF(self.known_motif_hits, get_motifs_per_TF(self.known_motifs, tf, motif_column = 'Motif Name', annotation=['Direct_annot'])) for tf in tfs}
else:
cistrome_dict_direct = {}
if not 'Direct_annot' in annotation or len(annotation) > 1:
tfs = get_TF_list(self.known_motifs, annotation=annotation)
cistrome_dict_extended = {tf+'_extended': get_cistrome_per_TF(self.known_motif_hits, get_motifs_per_TF(self.known_motifs, tf, motif_column = 'Motif Name', annotation=annotation)) for tf in tfs}
else:
cistrome_dict_extended = {}
cistrome_dict = {**cistrome_dict_direct, **cistrome_dict_extended}
cistrome_dict = {x + ' (' + str(len(cistrome_dict[x])) + 'r)': cistrome_dict[x] for x in cistrome_dict.keys()}
self.known_cistromes = cistrome_dict
if self.denovo_motif_hits is not None:
if 'Direct_annot' in annotation:
tfs = get_TF_list(self.denovo_motifs, annotation=['Direct_annot'])
cistrome_dict_direct = {tf: get_cistrome_per_TF(self.denovo_motif_hits, get_motifs_per_TF(self.denovo_motifs, tf, motif_column = 'Best Match/Details', annotation=['Direct_annot'])) for tf in tfs}
else:
cistrome_dict_direct = {}
if not 'Direct_annot' in annotation or len(annotation) > 1:
tfs = get_TF_list(self.denovo_motifs, annotation=annotation)
cistrome_dict_extended = {tf+'_extended': get_cistrome_per_TF(self.denovo_motif_hits, get_motifs_per_TF(self.denovo_motifs, tf, motif_column = 'Best Match/Details', annotation=annotation)) for tf in tfs}
else:
cistrome_dict_extended = {}
cistrome_dict = {**cistrome_dict_direct, **cistrome_dict_extended}
cistrome_dict = {x + '_(' + str(len(cistrome_dict[x])) + 'r)': cistrome_dict[x] for x in cistrome_dict.keys()}
self.denovo_cistromes = cistrome_dict
# Run Homer
[docs]def run_homer(homer_path: str,
region_sets: Dict[str, pr.PyRanges],
outdir: str,
genome: str,
size: str = 'given',
mask: bool = True,
denovo: bool = False,
length: str = '8,10,12',
n_cpu: int = 1,
meme_path: str = None,
meme_collection_path: str = None,
path_to_motif_annotations: str = None,
annotation_version: str = 'v9',
cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
motif_similarity_fdr: float = 0.001,
orthologous_identity_threshold: float = 0.0,
**kwargs):
"""
Run Homer
Parameters
---------
homer_path: str
Path to Homer bin folder.
region_sets: Dict
A dictionary of PyRanges containing region coordinates for the region sets to be analyzed.
outdir: str
Path to folder to output Homer results.
genome: str
Homer genome label to use.
size: str, optional
Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
mask: bool, optional
Whether to mask repeats or not. Default: True
denovo : bool, optional
Whether to infer overrepresented motifs de novo. Default: False
length: str, optional
Motif length values. Default: 8,10,12
n_cpu: int
Number of cores to use.
meme_path: str, optional
Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
meme_collection_path: str, optional
Path to motif collection (in .cb format) to compare homer motifs with. Default: None
path_to_motif_annotations: str, optional
Path to motif annotations. If not provided, they will be downloaded from
https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
homo_sapiens and drosophila_melanogaster). Default: None
annotation_version: str, optional
Motif collection version. Default: v9
cistrome_annotation: List, optional
Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is
linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
(based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
motif_similarity_fdr: float, optional
Minimal motif similarity value to consider two motifs similar. Default: 0.001
orthologous_identity_threshold: float, optional
Minimal orthology value for considering two TFs orthologous. Default: 0.0
**kwargs
Extra parameters to pass to `ray.init()`.
References
---------
Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining
Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
"""
# Create logger
level = logging.INFO
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level = level, format = format, handlers = handlers)
log = logging.getLogger('Homer')
# Save regions in dict to the output dir
bed_paths={}
bed_dir = os.path.join(outdir, 'regions_bed')
# Create bed directory
if not os.path.exists(outdir):
os.mkdir(outdir)
if not os.path.exists(bed_dir):
os.mkdir(bed_dir)
# Create bed files for Homer
for key in region_sets.keys():
bed_path = os.path.join(bed_dir, key+'.bed')
region_sets[key].Name = coord_to_region_names(region_sets[key])
region_sets[key].to_bed(path=bed_path, keep=False, compression='infer', chain=False)
bed_paths[key] = bed_path
# Run Homer
ray.init(num_cpus=n_cpu, **kwargs)
sys.stderr = null
homer_dict = ray.get([homer_ray.remote(homer_path,
bed_paths[name],
name,
outdir + name,
genome,
size,
mask,
denovo,
length,
meme_path,
meme_collection_path,
path_to_motif_annotations,
annotation_version,
cistrome_annotation,
motif_similarity_fdr,
orthologous_identity_threshold) for name in list(bed_paths.keys())])
ray.shutdown()
sys.stderr = sys.__stderr__
homer_dict={list(bed_paths.keys())[i]: homer_dict[i] for i in range(len(homer_dict))}
return homer_dict
@ray.remote
def homer_ray(homer_path: str,
bed_path: str,
name: str,
outdir: str,
genome: str,
size: str = 'given',
mask: bool = True,
denovo: bool = False,
length: str = '8,10,12',
meme_path: str = None,
meme_collection_path: str = None,
path_to_motif_annotations: str = None,
annotation_version: str = 'v9',
cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
motif_similarity_fdr: float = 0.001,
orthologous_identity_threshold: float = 0.0):
"""
Ray method to run Homer.
"""
# Create logger
level = logging.INFO
format = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level = level, format = format, handlers = handlers)
log = logging.getLogger('Homer')
if os.path.exists(outdir):
shutil.rmtree(outdir)
os.mkdir(outdir)
log.info('Running '+ name)
Homer_res = Homer(homer_path,
bed_path,
name,
outdir,
genome,
size,
mask,
denovo,
length,
meme_path,
meme_collection_path,
path_to_motif_annotations,
annotation_version,
cistrome_annotation,
motif_similarity_fdr)
Homer_res.run()
log.info(name + ' done!')
return Homer_res
# Utils
## Show results
[docs]def homer_results(homer_dict, name, results='known'):
"""
A function to show Homer results in jupyter notebooks.
Parameters
---------
Homer_dict: Dict
A dictionary with one :class:`Homer` object per slot.
name: str
Dictionary key of the analysis result to show. Default: None (All)
results: str
Whether to show know or de novo results. Default: 'known'
"""
if results == 'known':
file = os.path.join(homer_dict[name].outdir, 'knownResults.html')
if results == 'denovo':
file = os.path.join(homer_dict[name].outdir, 'homerResults.html')
inplace_change(file, 'width="505" height="50"', 'width="1010" height="200"')
return HTML(file)