This is still under beta functionality and for use in local installations only!
#!/usr/bin/python ## import data into VariantDB at the end of an analysis. ## 1. upload samples to VDB-ftp ## 2. Call API to import data. # load modules import sys import json import urllib import urllib2 import getopt import os.path import csv import re import shutil import time ## SET LOCAL INSTALLATIONS HERE: vdb_url = "http://143.169.238.105/variantdb/api/" ## GLOBAL VARS ## The path to the ftp structure has to be locally available (might be smb/nfs/... mounted) ftp_path = "/galaxy/galaxy-ftp/" #ftp_path = "/home/cnvbeta/ftp-data/" # api-imports are placed in fake user folder: "VariantDB_API" ftp_path = ftp_path+"VariantDB_API" apikey = '' def main() : ############# ## PREPARE ## ############# # parse commandline optlist,args = getArgs(sys.argv[1:]) # if no api key provided : exit try: apikey = optlist['k'] except: print('No API key provided.') Usage() # check correctness of API. answer = fetch_json_page(vdb_url + 'CheckApiKey?apiKey='+apikey) try: answer == '1' except: print(answer) print("Invalid API Key provided.") print("Log in on VariantDB and check the key under User-Settings (click on your name)") Usage() # get user details of API (check if admin user, get email for FTP-subfolder) try: user_details = fetch_json_page(vdb_url + 'GetUserDetails?apiKey='+apikey) except: print "Failed to retrieve user details for API-Key." sys.exit(2); ## As what user should I run this ? runas = SetRunAs(optlist,user_details) # check if ftp_location is writable. optlist['p'] = re.sub(r"[^\w]",'_',optlist['p']); if os.path.exists(ftp_path+'/'+runas+'_'+optlist['p']): shutil.rmtree(ftp_path+'/'+runas+'_'+optlist['p']) try: os.mkdir(ftp_path+'/'+runas+'_'+optlist['p']) except: print "Could not create folder %s/%s_%s" % (ftp_path,runas,optlist['p']) sys.exit(2) os.chmod(ftp_path+'/'+runas+'_'+optlist['p'],0777) # put userid in place uf = open(ftp_path+'/'+runas+'_'+optlist['p']+'/uid.txt','w') uf.write(runas) uf.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+"/uid.txt",0777) # put projectname in place pn = open(ftp_path+'/'+runas+'_'+optlist['p']+'/project.name.txt','w') pn.write(optlist['p']) pn.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+"/project.name.txt",0777) ################### ## 1. UPLOAD DATA # ################### try: f = open(optlist['s'],'r') except: print "Could not open file '"+optlist['s']+"' for reading" sys.exit(2) ss = csv.reader(f,delimiter='\t') # open filehandles. names = open(ftp_path+'/'+runas+'_'+optlist['p']+"/names.txt",'w') genders = open(ftp_path+'/'+runas+'_'+optlist['p']+"/genders.txt",'w') formats = open(ftp_path+'/'+runas+'_'+optlist['p']+"/formats.txt",'w') dataidx = 0 for line in ss: if len(line) == 0: continue print "sample: %s" % (line[0]) # check validity vcf should be file. if not os.path.isfile(line[2]): print "Specified VCF file does not exist:"+line[2] print " => Sample skipped" continue # increment counter dataidx += 1 # add sample name to samples.txt names.write("name"+str(dataidx)+"=="+line[0]+'\n') # add gender to genders.txt genders.write("gender"+str(dataidx)+"=="+line[1]+'\n') # add store store = open(ftp_path+'/'+runas+'_'+optlist['p']+"/store."+str(dataidx)+".txt",'w') store.write(line[4]) store.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/store.'+str(dataidx)+'.txt',0777) # add format formats.write("format"+str(dataidx)+"=="+line[5]+'\n') # copy vcf shutil.copyfile(line[2],ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.vcf') os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.vcf',0777) # copy bam if os.path.isfile(line[3]): shutil.copyfile(line[3],ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.bam') os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.bam',0777) f.close() # chmod samples, genders, store. names.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/names.txt',0777) genders.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/genders.txt',0777) formats.close() os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/formats.txt',0777) if dataidx == 0: print "No data to import" sys.exit(0) ######################### ## 2. RUN IMPORT BY API # ######################### try: answer = fetch_json_page(vdb_url + 'ImportData/'+runas+'_'+optlist['p']+'/'+str(dataidx)+'?apiKey='+apikey) except: print "Failed to start import on VariantDB by API." sys.exit(2); if answer['result'] == 'Started': print "Import started, job id is %s" %(answer['job_key']) ################################### ## 3. WAIT FOR IMPORT TO FINISH ## ################################### status = 'Running'; jobKey = answer['job_key'] print "Waiting for import to finish..." while status == 'Running': try: answer = fetch_json_page(vdb_url + 'GetStatus/Import/'+jobKey+'?apiKey='+apikey) except: print "Failed to get import status." sys.exit(2); status = answer['status'] time.sleep(15) print "Import finished. Status: "+status ########### ## CLEAN ## ########### # the 'ftp'-folder is cleaned by import api routine. sys.exit(0) def SetRunAs(optlist,user_details): apikey = optlist['k'] if 'u' in optlist: # check for permissions for running as different user. if (optlist['u'] != user_details['email'] and user_details['level'] < 3): print "provided email does not match api-user email." print " => This is only allowed for admin users (which you are not)." print " => provided: %s ; api-user: %s" % (oplist['u'],user_details['email']) sys.exit(2) # api and provided are the same if (optlist['u'] == user_details['email']): return(user_details['id']) # sufficient permissions to run as different user: get it's details. try: details = fetch_json_page(vdb_url + 'GetUserDetails/'+optlist['u']+'?apiKey='+apikey) except: print "Failed to retrieve user details for API-Key." sys.exit(2); if (details['id']): print "Importing to account of %s %s " % (details['FirstName'], details['LastName']) return(details['id']) else: print "Provided email is not registered at VariantDB." sys.exit(2) ## no email provided, run as apiKey user return(user_details['id']) def getArgs(args): ## arguments # -k : apikey (mandatory) opts, args = getopt.getopt(args, 'k:p:s:u:h') optlist = dict() for opt, arg in opts: optlist[opt[1:]] = arg if 'p' not in optlist or optlist['p'] == '': print "Missing argument : -p" Usage() if 's' not in optlist or optlist['s'] == '': print "Missing argument : -s" Usage() if 'h' in optlist: Usage() return(optlist,args) def Usage(): # print help print "\n\nUsage: python Import_To_VariantDB.py -k " print " Default: Import samples provided on ftp server" print " Mandatory: -k : api-key of administrator user (can import for others), or of user (import into this user account)" print " Mandatory: -p : Project name. (string)." print " Mandatory: -s : Sample Sheet (path)." print " Optional : -u : Import as user. This is only allowed for admin users on VariantDB. (email)" print "\n" print "Note: "; print " Format of SampleSheet file (tab seperated, no header):"; print " - sample name" print " - gender (Male/Female/[undef])" print " - path_to_VCF" print " - path_to_BAM" print " - store data in VariantDB ([0]/1)" print " - formats:" print " UG : Unified Genotyper" print " HC : Haplotype Caller" print " VS : Varscan" print " MT : MuTect" print " 23 : 23 and Me, converted to VCF" print " IT : Ion Torrent Variant Caller" print " " sys.exit(0) def fetch_json_page(url): try: data = urllib2.urlopen(url) j = json.load(data) except: print('Fetching api repsonse failed for following url:') print(url) sys.exit(2) ## return data return j if __name__ == "__main__": main()
Format of SampleSheet file (tab seperated, no header):