Source code for pycistarget.motif_enrichment_homer

import glob
from IPython.display import HTML
import logging
import os
import pandas as pd
from pybiomart import Dataset
import pyranges as pr 
import ray
import subprocess
import shutil
import sys
from typing import Union, Dict, Sequence, Optional

from .utils import *

# Set stderr to null when using ray.init to avoid ray printing Broken pipe million times
_stderr = sys.stderr                                                         
null = open(os.devnull,'wb') 

[docs]class Homer(): 
    """
    Homer class.
    :class:`Homer` contains Homer for motif enrichment analysis on sets of regions. 
    
    Attributes
    ---------
    homer_path: str
        Path to Homer bin folder.
    bed_path: str
        Path to bed file containing region set to be analyzed with Homer.
    name: str
        Analysis name.
    outdir: str
        Path to folder to output Homer results.
    genome: str
        Homer genome label to use.
    size: str, optional
        Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
    mask: bool, optional
        Whether to mask repeats or not. Default: True
    denovo : bool, optional
        Whether to infer overrepresented motifs de novo. Default: False
    length: str, optional
        Motif length values. Default: 8,10,12
    meme_path: str, optional
        Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
    meme_collection_path: str, optional
        Path to motif collection (in .cb format) to compare homer motifs with. Default: None
    path_to_motif_annotations: str, optional
        Path to motif annotations. If not provided, they will be downloaded from 
        https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
        homo_sapiens and drosophila_melanogaster). Default: None
    annotation_version: str, optional
        Motif collection version. Default: v9
    cistrome_annotation: List, optional
        Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is 
        linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
        (based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
        Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
    motif_similarity_fdr: float, optional
        Minimal motif similarity value to consider two motifs similar. Default: 0.001
    orthologous_identity_threshold: float, optional
        Minimal orthology value for considering two TFs orthologous. Default: 0.0
    known_motifs: pd.DataFrame
        A dataframe containing known motif enrichment results.
    denovo_motifs: pd.DataFrame
        A dataframe containing de novo motif enrichment results.
    known_motif_hits: Dict
        A dictionary containing regions with motif hits for each known motif.
    denovo_motif_hits: Dict
        A dictionary containing regions with motif hits for each de novo motif.
    known_cistromes: Dict
        A dictionary containing regions with motif hits for each TF found with known motifs.
    denovo_motif_hits: Dict
        A dictionary containing regions with motif hits for each TF found de novo.

    References
    ---------
    Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining 
    Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
    Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
    """
    def __init__(self,
                 homer_path: str,
                 bed_path: str,
                 name: str,
                 outdir: str,
                 genome: str,
                 size: str = 'given',
                 mask: bool = True,
                 denovo: bool = False,
                 length: str = '8,10,12',
                 meme_path: str = None,
                 meme_collection_path: str = None,
                 path_to_motif_annotations: str = None,
                 annotation_version: str = 'v9',
                 cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
                 motif_similarity_fdr: float = 0.001,
                 orthologous_identity_threshold: float = 0.0):
        """
        Initialize Homer class. 

        Parameters
        ---------
        homer_path: str
            Path to Homer bin folder.
        bed_path: str
            Path to bed file containing region set to be analyzed with Homer.
        name: str
            Analysis name.
        outdir: str
            Path to folder to output Homer results.
        genome: str
            Homer genome label to use.
        size: str, optional
            Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
        mask: bool, optional
            Whether to mask repeats or not. Default: True
        denovo : bool, optional
            Whether to infer overrepresented motifs de novo. Default: False
        length: str, optional
            Motif length values. Default: 8,10,12
        meme_path: str, optional
            Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
        meme_collection_path: str, optional
            Path to motif collection (in .cb format) to compare homer motifs with. Default: None
        path_to_motif_annotations: str, optional
            Path to motif annotations. If not provided, they will be downloaded from 
            https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
            homo_sapiens and drosophila_melanogaster). Default: None
        annotation_version: str, optional
            Motif collection version. Default: v9
        cistrome_annotation: List, optional
            Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is 
            linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
            (based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
            Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
        motif_similarity_fdr: float, optional
            Minimal motif similarity value to consider two motifs similar. Default: 0.001
        orthologous_identity_threshold: float, optional
            Minimal orthology value for considering two TFs orthologous. Default: 0.0

        References
        ---------
        Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining 
        Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
        Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
        """
        
        self.homer_path = homer_path
        self.bed_path = bed_path
        self.genome = genome
        self.outdir = outdir
        self.size = size
        self.len = length
        self.mask = mask
        self.denovo = denovo
        self.name = name
        self.meme_path = meme_path
        self.meme_collection_path = meme_collection_path
        self.path_to_motif_annotations = path_to_motif_annotations
        self.annotation_version = annotation_version
        self.cistrome_annotation = cistrome_annotation
        self.motif_similarity_fdr = motif_similarity_fdr
        self.orthologous_identity_threshold = orthologous_identity_threshold
        self.known_motifs = None
        self.denovo_motifs = None
        self.known_motif_hits = None
        self.denovo_motif_hits = None
        self.known_cistromes = None
        self.denovo_cistromes = None

[docs]    def run(self):
        """
        Run Homer
        """
        # Create logger
        level    = logging.INFO
        format   = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
        handlers = [logging.StreamHandler(stream=sys.stdout)]
        logging.basicConfig(level = level, format = format, handlers = handlers)
        log = logging.getLogger('Homer')
        
        if self.mask == True and self.denovo == False:
            cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -mask -nomotif -keepFiles'
        if self.mask == True and self.denovo == True:
            cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -mask -keepFiles'
        if self.mask == False and self.denovo == False:
            cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -nomotif -keepFiles'
        if self.mask == False and self.denovo == True:
            cmd = os.path.join(self.homer_path, 'findMotifsGenome.pl') + ' %s %s %s -preparsedDir %s -size %s -len %s -keepFiles'
            
        cmd = cmd % (self.bed_path, self.genome, self.outdir, self.outdir, self.size, self.len)
        log.info("Running Homer for " + self.name + " with %s", cmd)
        try:
            subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
        
        try:
            self.known_motifs = self.load_known()
        except:
            log.info('No known results found')
        if self.denovo == True:
            try:
                self.denovo_motifs = self.load_denovo()
            except:
                log.info('No de novo results found')
           
        log.info("Annotating motifs for " + self.name)     
        self.add_motif_annotation_homer()
        log.info("Finding motif hits for " + self.name)   
        self.find_motif_hits(n_cpu=1)
        log.info("Getting cistromes for " + self.name) 
        self.get_cistromes(self.cistrome_annotation)
        
[docs]    def load_known(self):
        """
        Load known motif enrichment results from file.
        """
        known = pd.read_csv(os.path.join(self.outdir, 'knownResults.txt'), sep='\t')
        return known
    
[docs]    def load_denovo(self):
        """
        Load de novo motif enrichment results from file.
        """
        denovo = pd.read_html(os.path.join(self.outdir, 'homerResults.html'), header=0)[0].iloc[:,[7,2,3,4,5,6]]
        denovo.iloc[:,0] = [x.split('More')[0] for x in denovo.iloc[:,0]]
        denovo.to_csv(os.path.join(self.outdir, 'homerResults.txt'), sep='\t', index=False)
        return denovo
    
[docs]    def add_motif_annotation_homer(self):
        """
        Add motif annotations (based on Homer, cisTarget and meme if specified)
        """
        # Create logger
        level    = logging.INFO
        format   = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
        handlers = [logging.StreamHandler(stream=sys.stdout)]
        logging.basicConfig(level = level, format = format, handlers = handlers)
        log = logging.getLogger('Homer')
        
        if self.known_motifs is not None:
            if self.known_motifs.shape[0] != 0:
                log.info('Annotating known motifs')
                # Prepare cistarget annotation
                if 'mm' in self.genome:
                    species = 'mus_musculus'
                if 'dm' in self.genome:
                    species = 'drosophila_melanogaster'
                if 'hg' in self.genome:
                    species = 'homo_sapiens'
                ctx_motif_annotation = load_motif_annotations(species,
                                         version = self.annotation_version,
                                         fname = self.path_to_motif_annotations,
                                         motif_similarity_fdr= self.motif_similarity_fdr,
                                         orthologous_identity_threshold=self.orthologous_identity_threshold)
                motifs = self.known_motifs
                homer_motifs = 'homer__' + motifs['Consensus'] + '_' + [x.split('(')[0] for x in motifs['Motif Name']]

                motifs['MotifID'] = homer_motifs
                homer_motifs = [x for x in homer_motifs if x in ctx_motif_annotation.index.tolist()]
                ctx_motif_annotation = ctx_motif_annotation.loc[list(set(homer_motifs))].reset_index()

                # Prepare homer annotation
                try:
                    homer_motif_annotation = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(self.homer_path), '..', 'motifs/extras/table.txt')),
                                                    sep='\t', error_bad_lines=False).iloc[:,[1,11]].dropna()
                except:
                    homer_motif_annotation = pd.read_csv(os.path.abspath(os.path.join(os.path.dirname(self.homer_path), '..', 'motifs/extras/motifTable.txt')),
                                                    sep='\t', error_bad_lines=False).iloc[:,[1,11]].dropna()
                    homer_motif_annotation.columns = ['Name', 'Symbol'] 
                # If not human, convert by homology
                if species != 'homo_sapiens':
                    dataset = Dataset(name='hsapiens_gene_ensembl',
                                  host='http://www.ensembl.org')
                    if species == 'mus_musculus':
                        biomart_query = 'mmusculus_homolog_associated_gene_name'
                    if species == 'drosophila_melanogaster':
                        biomart_query = 'dmelanogaster_homolog_associated_gene_name'

                    human2specie = dataset.query(attributes=['external_gene_name', biomart_query])
                    human2specie.index = human2specie['Gene name']
                    # Check that the TF has homolog
                    TF_names = [x for x in homer_motif_annotation.iloc[:,1].tolist() if x in human2specie.index.tolist()]
                    human2specie = human2specie.loc[TF_names,:]
                    human2specie.columns = ['Symbol', 'Homolog']
                    df = pd.merge(homer_motif_annotation, human2specie, on='Symbol', how='left')
                    homer_motif_annotation = df.iloc[:,[0,2]]
                    homer_motif_annotation.columns = ['Name', 'Symbol']

                # We first bind the cisTarget annotation
                motifs = pd.merge(motifs, ctx_motif_annotation, on='MotifID', how='left')
                # We now bind the Homer annotation
                homer_motif_annotation.columns = ['Motif Name', 'Homer_annot']
                motifs = pd.merge(motifs, homer_motif_annotation, on='Motif Name', how='left')
                # If Homer_annot is not in Direct_annot we will add it
                # Concatenate
                motifs.Direct_annot = [str(motifs.Direct_annot.tolist()[x]) + ', ' + str(motifs.Homer_annot.tolist()[x]) 
                                   if (str(motifs.Homer_annot.tolist()[x]) not in str(motifs.Direct_annot.tolist()[x]))
                                   else motifs.Direct_annot.tolist()[x] for x in range(motifs.shape[0])]
                motifs.Direct_annot = motifs.Direct_annot.replace('nan, ', '', regex=True)
                motifs.Direct_annot = motifs.Direct_annot.replace(', nan', '', regex=True)
                motifs = motifs.drop(['MotifID', 'Homer_annot'], axis=1)
                self.known_motifs = motifs
        
        if self.denovo_motifs is not None:
            if self.denovo_motifs.shape[0] != 0:
                if self.meme_path is None:
                    log.info('Parameter meme_path is not provided. Skipping annotation of de novo motifs')
                elif self.meme_collection_path is None:
                    log.info('Parameter meme_collection_path is not provided. Skipping annotation of de novo motifs')
                else:
                    # Find closest match for denovo motifs in cistarget database (as meme)
                    log.info('Comparing de novo motifs with given motif collection with tomtom')
                    homer_motif_paths = glob.glob(os.path.join(self.outdir, 'homerResults', 'motif*[0-9$].motif'))
                    homer_motif_paths = [x for x in homer_motif_paths if 'similar' not in x] 
                    tomtom_pd = pd.concat([tomtom(x, self.meme_path, self.meme_collection_path) for x in homer_motif_paths])
                    ctx_motif_annotation = load_motif_annotations(species,
                                         version = self.annotation_version,
                                         fname = self.path_to_motif_annotations,
                                         motif_similarity_fdr= self.motif_similarity_fdr,
                                         orthologous_identity_threshold=self.orthologous_identity_threshold)
                    homer_motifs = [x for x in tomtom_pd.iloc[:,1].tolist() if x in ctx_motif_annotation.index.tolist()]
                    ctx_motif_annotation = ctx_motif_annotation.loc[list(set(homer_motifs))].reset_index()
                    ctx_motif_annotation = ctx_motif_annotation.rename(columns={'MotifID': 'Best Match/Tomtom'})
                    # Bind cisTarget annotation
                    tomtom_pd = pd.merge(tomtom_pd, ctx_motif_annotation, on='Best Match/Tomtom', how='left')
                    motifs = pd.merge(self.denovo_motifs, tomtom_pd, on='Best Match/Details', how='left')
                    self.denovo_motifs = motifs

[docs]    def find_motif_hits(self, n_cpu=1):
        """
        Find motif hits with `homer2 find`
        
        Parameters
        ---------
        n_cpu: int
            Number of cores to use.
        """
        # Create logger
        level    = logging.INFO
        format   = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
        handlers = [logging.StreamHandler(stream=sys.stdout)]
        logging.basicConfig(level = level, format = format, handlers = handlers)
        log = logging.getLogger('Homer')
        
        if self.known_motifs is not None:
            if self.known_motifs.shape[0] != 0:
                # Merge all motifs to file
                log.info('Retrieving enriched regions per known motif')
                if os.path.exists(os.path.join(self.outdir, 'knownResults', 'all_motifs.motif')):
                    os.remove(os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'))
                for f in glob.glob(os.path.join(self.outdir, 'knownResults', '*.motif')):
                    os.system("cat "+f+" >> "+os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'))
                cmd = os.path.join(self.homer_path, 'homer2 find') + ' -s %s -m %s -o %s -p %s'
                cmd = cmd % (os.path.join(self.outdir, 'targetgiven.seq'), os.path.join(self.outdir, 'knownResults', 'all_motifs.motif'), os.path.join(self.outdir, 'knownResults_motif_hits.bed'), n_cpu)
                try:
                    subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
                except subprocess.CalledProcessError as e:
                    raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))

                known_motif_hits = pd.read_csv(os.path.join(self.outdir, 'knownResults_motif_hits.bed'), sep='\t', header=None)
                self.known_motif_hits = known_motif_hits.groupby(3)[0].apply(lambda g: list(set(g.values.tolist()))).to_dict()
        if self.denovo_motifs is not None:
            if self.denovo_motifs.shape[0] != 0:
                # Merge all motifs to file
                log.info('Retrieving enriched regions per de novo motif')
                if os.path.exists(os.path.join(self.outdir, 'homerResults', 'all_motifs.motif')):
                    os.remove(os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
                for f in glob.glob(os.path.join(self.outdir, 'homerResults', '*.motif')):
                    os.system("cat "+f+" >> "+os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
                os.system("sed -i 's/\t.*BestGuess:/\t/g' "+os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'))
                cmd = os.path.join(self.homer_path, 'homer2 find') + ' -s %s -m %s -o %s -p %s'
                cmd = cmd % (os.path.join(self.outdir, 'targetgiven.seq'), os.path.join(self.outdir, 'homerResults', 'all_motifs.motif'), os.path.join(self.outdir, 'homerResults_motif_hits.bed'), n_cpu)
                try:
                    subprocess.check_output(args=cmd, shell=True, stderr=subprocess.STDOUT)
                except subprocess.CalledProcessError as e:
                    raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))

                denovo_motif_hits = pd.read_csv(os.path.join(self.outdir, 'homerResults_motif_hits.bed'), sep='\t', header=None)
                denovo_motif_hits  = denovo_motif_hits.groupby(3)[0].apply(lambda g: list(set(g.values.tolist()))).to_dict()
                self.denovo_motif_hits = {k:denovo_motif_hits[k] for k in denovo_motif_hits.keys() if not k[0].isdigit()}
                
[docs]    def get_cistromes(self, annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']):
        """
        Format cistromes per TF
        
        Parameters
        ---------
        cistrome_annotation: List, optional
            Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is 
            linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
            (based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
            Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']

        """
        if self.known_motif_hits is not None:
            if 'Direct_annot' in annotation:
                tfs = get_TF_list(self.known_motifs, annotation=['Direct_annot'])
                cistrome_dict_direct = {tf: get_cistrome_per_TF(self.known_motif_hits,  get_motifs_per_TF(self.known_motifs, tf, motif_column = 'Motif Name', annotation=['Direct_annot'])) for tf in tfs}
            else:
                cistrome_dict_direct = {}
                
            if not 'Direct_annot' in annotation or len(annotation) > 1:
                tfs = get_TF_list(self.known_motifs, annotation=annotation)
                cistrome_dict_extended = {tf+'_extended': get_cistrome_per_TF(self.known_motif_hits,  get_motifs_per_TF(self.known_motifs, tf, motif_column = 'Motif Name', annotation=annotation)) for tf in tfs}
            else:
                cistrome_dict_extended = {}
                
            cistrome_dict = {**cistrome_dict_direct, **cistrome_dict_extended}
            cistrome_dict = {x + ' (' + str(len(cistrome_dict[x])) + 'r)': cistrome_dict[x] for x in cistrome_dict.keys()}
            self.known_cistromes = cistrome_dict 
        if self.denovo_motif_hits is not None:
            if 'Direct_annot' in annotation:
                tfs = get_TF_list(self.denovo_motifs, annotation=['Direct_annot'])
                cistrome_dict_direct = {tf: get_cistrome_per_TF(self.denovo_motif_hits,  get_motifs_per_TF(self.denovo_motifs, tf, motif_column = 'Best Match/Details', annotation=['Direct_annot'])) for tf in tfs}
            else:
                cistrome_dict_direct = {}
                
            if not 'Direct_annot' in annotation or len(annotation) > 1:
                tfs = get_TF_list(self.denovo_motifs, annotation=annotation)
                cistrome_dict_extended = {tf+'_extended': get_cistrome_per_TF(self.denovo_motif_hits,  get_motifs_per_TF(self.denovo_motifs, tf, motif_column = 'Best Match/Details', annotation=annotation)) for tf in tfs}
            else:
                cistrome_dict_extended = {}
                
            cistrome_dict = {**cistrome_dict_direct, **cistrome_dict_extended}
            cistrome_dict = {x + '_(' + str(len(cistrome_dict[x])) + 'r)': cistrome_dict[x] for x in cistrome_dict.keys()}
            self.denovo_cistromes = cistrome_dict
 
# Run Homer           
[docs]def run_homer(homer_path: str,
                             region_sets: Dict[str, pr.PyRanges],
                             outdir: str,
                             genome: str,
                             size: str = 'given',
                             mask: bool = True,
                             denovo: bool = False,
                             length: str = '8,10,12',
                             n_cpu: int = 1,
                             meme_path: str = None,
                             meme_collection_path: str = None,
                             path_to_motif_annotations: str = None,
                             annotation_version: str = 'v9',
                             cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
                             motif_similarity_fdr: float = 0.001,
                             orthologous_identity_threshold: float = 0.0,
                             **kwargs):
    """
    Run Homer 

    Parameters
    ---------
    homer_path: str
        Path to Homer bin folder.
    region_sets: Dict
        A dictionary of PyRanges containing region coordinates for the region sets to be analyzed.
    outdir: str
        Path to folder to output Homer results.
    genome: str
        Homer genome label to use.
    size: str, optional
        Fragment size to use for motif finding. Default: 'given' [uses the exact regions you give it]
    mask: bool, optional
        Whether to mask repeats or not. Default: True
    denovo : bool, optional
        Whether to infer overrepresented motifs de novo. Default: False
    length: str, optional
        Motif length values. Default: 8,10,12
    n_cpu: int
        Number of cores to use.
    meme_path: str, optional
        Path to meme bin folder. Meme will be used if given for motif annotation. Default: None
    meme_collection_path: str, optional
        Path to motif collection (in .cb format) to compare homer motifs with. Default: None
    path_to_motif_annotations: str, optional
        Path to motif annotations. If not provided, they will be downloaded from 
        https://resources.aertslab.org based on the specie name provided (only possible for mus_musculus,
        homo_sapiens and drosophila_melanogaster). Default: None
    annotation_version: str, optional
        Motif collection version. Default: v9
    cistrome_annotation: List, optional
        Annotation to use for forming cistromes. It can be 'Direct_annot' (direct evidence that the motif is 
        linked to that TF), 'Motif_similarity_annot' (based on tomtom motif similarity), 'Orthology_annot'
        (based on orthology with a TF that is directly linked to that motif) or 'Motif_similarity_and_Orthology_annot'.
        Default: ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot']
    motif_similarity_fdr: float, optional
        Minimal motif similarity value to consider two motifs similar. Default: 0.001
    orthologous_identity_threshold: float, optional
        Minimal orthology value for considering two TFs orthologous. Default: 0.0
    **kwargs
        Extra parameters to pass to `ray.init()`.

    References
    ---------
    Heinz S, Benner C, Spann N, Bertolino E et al. Simple Combinations of Lineage-Determining 
    Transcription Factors Prime cis-Regulatory Elements Required for Macrophage and B Cell Identities.
    Mol Cell 2010 May 28;38(4):576-589. PMID: 20513432
    """
    # Create logger
    level    = logging.INFO
    format   = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    handlers = [logging.StreamHandler(stream=sys.stdout)]
    logging.basicConfig(level = level, format = format, handlers = handlers)
    log = logging.getLogger('Homer')
    # Save regions in dict to the output dir
    bed_paths={}
    bed_dir = os.path.join(outdir, 'regions_bed')
    # Create bed directory
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    if not os.path.exists(bed_dir):
        os.mkdir(bed_dir)
    # Create bed files for Homer
    for key in region_sets.keys():
        bed_path = os.path.join(bed_dir, key+'.bed')
        region_sets[key].Name =  coord_to_region_names(region_sets[key])
        region_sets[key].to_bed(path=bed_path, keep=False, compression='infer', chain=False)
        bed_paths[key] = bed_path
    # Run Homer
    ray.init(num_cpus=n_cpu, **kwargs)
    sys.stderr = null
    homer_dict = ray.get([homer_ray.remote(homer_path,
                                bed_paths[name],
                                name,
                                outdir + name, 
                                genome,
                                size,
                                mask,
                                denovo,
                                length, 
                                meme_path,
                                meme_collection_path,
                                path_to_motif_annotations,
                                annotation_version,
                                cistrome_annotation,
                                motif_similarity_fdr,
                                orthologous_identity_threshold) for name in list(bed_paths.keys())])
    ray.shutdown()
    sys.stderr = sys.__stderr__
    homer_dict={list(bed_paths.keys())[i]: homer_dict[i] for i in range(len(homer_dict))}
    return homer_dict

@ray.remote
def homer_ray(homer_path: str,
              bed_path: str,
              name: str,
              outdir: str,
              genome: str,
              size: str = 'given',
              mask: bool = True,
              denovo: bool = False,
              length: str = '8,10,12',
              meme_path: str = None,
              meme_collection_path: str = None,
              path_to_motif_annotations: str = None,
              annotation_version: str = 'v9',
              cistrome_annotation: List[str] = ['Direct_annot', 'Motif_similarity_annot', 'Orthology_annot', 'Motif_similarity_and_Orthology_annot'],
              motif_similarity_fdr: float = 0.001,
              orthologous_identity_threshold: float = 0.0):
    """
    Ray method to run Homer.
    """
    # Create logger
    level    = logging.INFO
    format   = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
    handlers = [logging.StreamHandler(stream=sys.stdout)]
    logging.basicConfig(level = level, format = format, handlers = handlers)
    log = logging.getLogger('Homer')
    
    if os.path.exists(outdir):
        shutil.rmtree(outdir)
    os.mkdir(outdir)
    
    log.info('Running '+ name)
    Homer_res = Homer(homer_path,
                bed_path,
                name,
                outdir, 
                genome, 
                size, 
                mask, 
                denovo, 
                length, 
                meme_path, 
                meme_collection_path, 
                path_to_motif_annotations,
                annotation_version,
                cistrome_annotation, 
                motif_similarity_fdr)
    Homer_res.run()
    log.info(name + ' done!')
    return Homer_res
            
# Utils
## Show results
[docs]def homer_results(homer_dict, name, results='known'):
    """
    A function to show Homer results in jupyter notebooks.
    
    Parameters
    ---------
    Homer_dict: Dict
        A dictionary with one :class:`Homer` object per slot.
    name: str
        Dictionary key of the analysis result to show. Default: None (All)
    results: str
        Whether to show know or de novo results. Default: 'known'
    """
    if results == 'known':
        file = os.path.join(homer_dict[name].outdir, 'knownResults.html')
    if results == 'denovo':
        file = os.path.join(homer_dict[name].outdir, 'homerResults.html')
    inplace_change(file, 'width="505" height="50"', 'width="1010" height="200"')
    return HTML(file)