Skip to content
Snippets Groups Projects
subset_scores.py 2.82 KiB
#!/usr/bin/env python

import argparse
from pathlib import Path

def parse_args():
    parser = argparse.ArgumentParser(description='Subset the scores to a given list of interactions')

    optionalArgs = parser._action_groups.pop()

    requiredArgs = parser.add_argument_group("required arguments")
    requiredArgs.add_argument('-s', '--scores-file',
                              dest='scores_file',
                              type=lambda p: Path(p).resolve(strict=True),
                              required=True,
                              help="The output of calculate_all_scores.py"
                              )
    requiredArgs.add_argument('-i', '--input-ints',
                              dest='input_ints',
                              type=lambda p: Path(p).resolve(strict=True),
                              required=True,
                              help="A tsv file containing interaction mappings between ncbi and pvogs")
    requiredArgs.add_argument('-o', '--output-file',
                              dest='outfile',
                              required=True,
                              type=lambda p: Path(p).resolve(),
                              help="File path to write the results in")

    parser._action_groups.append(optionalArgs)

    return parser.parse_args()

def parse_pvogs_interactions(interactions_fp):
    interactions = []
    uniques = 0
    all_in = 0
    with open(interactions_fp, 'r') as fp:
        for line in fp:
            all_in += 1
            fields = line.split('\t')
            interaction = tuple(sorted([fields[1].strip(), fields[3].strip()]))
            if interaction[0] == interaction[1]:
                print("Skipping: {} (self)".format(interaction))
                
            elif interaction not in interactions:
                interactions.append(interaction)
                uniques +=1
            else:
                print("Skipping: {} (duplicate)".format(interaction))
    print("Parsed {} / {} total input interactions".format(uniques, all_in))
    return interactions

def subset_all_scores(all_scores_fp, interactions_list, interactions_scores_fp):
    counter = 0 
    with open(all_scores_fp, 'r') as fin, open(interactions_scores_fp, 'w') as fout:
        header = fin.readline()
        fout.write(header)
        for line in fin:
            fields = line.split('\t')
            interaction = tuple(sorted([fields[0].strip(), fields[1].strip()]))
            if interaction in interactions_list:
                fout.write(line)
                counter += 1
    return counter

def main():
    args = parse_args()
    ints_list = parse_pvogs_interactions(args.input_ints)
    total = subset_all_scores(args.scores_file, ints_list, args.outfile)
    print("{} interaction scores written to file {}".format(total, str(args.outfile)))

if __name__ == '__main__':
    main()