-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpsmc_for_vcf.py
More file actions
76 lines (62 loc) · 2.44 KB
/
psmc_for_vcf.py
File metadata and controls
76 lines (62 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import sys
import time
# example command:
# python psmc_for_vcf.py /vol/storage/pipeline/Fregetta_grallaria/output.vcf -o /vol/storage/pipeline/Fregetta_grallaria/psmc/ -n Fregetta_grallaria
# get starting time of program
program_start_time = time.time()
if not len(sys.argv) == 6:
print("Bad Inputs. Command is: python psmc_for_vcf.py [VCF_FILE] -o [OUTPUT_DIRECTORY] -n [NAME_FOR_FILES]")
exit()
# path of .vcf
file_path = sys.argv[1]
# output directory
output_dir = sys.argv[3]
# name for files
file_name = sys.argv[5]
# sys.argv[0] is the command itself
if not output_dir[-1:] == "/":
output_dir = output_dir + "/"
# check if folder exists and if not, create it
if not os.path.exists(output_dir):
os.makedirs(output_dir)
#
#
# TODO: Test if this works with a bam file, like bwa.sorted.bam
#
# where `diploid.fq.gz' is typically the whole-genome diploid consensus sequence
# of one human individual, which can be generated by, for example:
#
# samtools mpileup -C50 -uf ref.fa aln.bam | bcftools view -c - \
# | vcfutils.pl vcf2fq -d 10 -D 100 | gzip > diploid.fq.gz
#
# if needed, unzip .vcf file and remember to rezip it at the end
wasZipped = False
if file_path[-3:] == ".gz":
wasZipped = True
print("Unzipping .vcf file...")
os.system("gunzip " + file_path)
file_path = file_path[:-3]
# make consensus file
print("Creating consensus file...")
os.system("vcfutils.pl vcf2fq -d 10 -D 100 " + file_path + " | gzip > " + output_dir + "consensus.fq")
# psmc commands
print("Now using command fq2psmcfa")
# !!! NOTE !!! if -q20 doesn't work, try -q10
first_command = "/vol/storage/psmc-0.6.5/utils/fq2psmcfa -q20 consensus.fq.gz > " + output_dir + file_name + ".psmcfa"
print(first_command)
os.system(first_command)
print("Now using command psmc")
parameter = "4+25*2+4+6"
second_command = "/vol/storage/psmc-0.6.5/psmc -N25 -t15 -r5 -p " + parameter + " -o " + output_dir + file_name + ".psmc " + output_dir + file_name + ".psmcfa"
print(second_command)
os.system(second_command)
print("Now using command psmc2history-pl")
third_command = "/vol/storage/psmc-0.6.5/utils/psmc2history.pl " + output_dir + file_name + ".psmc | /vol/storage/psmc-0.6.5/utils/history2ms.pl > " + output_dir + file_name + "_ms-cmd.sh"
print(third_command)
os.system(third_command)
if wasZipped:
print("Rezipping .vcf file...")
os.system("gzip " + file_path)
run_time = time.time() - program_start_time
print("\nFinished! It took " + str(run_time / 3600) + " hours to complete")