Documentation : API-based import

1. General

This is still under beta functionality and for use in local installations only!

2. Prerequisites

  • The galaxy-ftp server must be available as a local mount point.
  • ftp folder must be writable by the executing user.

3. The script

Code
#!/usr/bin/python



## import data into VariantDB at the end of an analysis.

## 1. upload samples to VDB-ftp
## 2. Call API to import data.


# load modules
import sys
import json
import urllib
import urllib2
import getopt 
import os.path
import csv
import re
import shutil
import time

## SET LOCAL INSTALLATIONS HERE:
vdb_url = "http://143.169.238.105/variantdb/api/"

## GLOBAL VARS
## The path to the ftp structure has to be locally available (might be smb/nfs/... mounted)
ftp_path = "/galaxy/galaxy-ftp/"
#ftp_path = "/home/cnvbeta/ftp-data/"
# api-imports are placed in fake user folder: "VariantDB_API"
ftp_path = ftp_path+"VariantDB_API"
apikey = ''

def main() :
	#############
	## PREPARE ##
	#############
	# parse commandline
	optlist,args = getArgs(sys.argv[1:])
	# if no api key provided : exit
	try: 
		apikey = optlist['k']
	except: 
		print('No API key provided.')
		Usage()
			
	
	# check correctness of API.
	answer = fetch_json_page(vdb_url + 'CheckApiKey?apiKey='+apikey)
	try:
		answer == '1'
	except:
		print(answer) 
		print("Invalid API Key provided.")
		print("Log in on VariantDB and check the key under User-Settings (click on your name)")
		Usage()

	# get user details of API (check if admin user, get email for FTP-subfolder)
	try:
		user_details = fetch_json_page(vdb_url + 'GetUserDetails?apiKey='+apikey)
	except:
		print "Failed to retrieve user details for API-Key."
		sys.exit(2);
	
	## As what user should I run this ?
	runas = SetRunAs(optlist,user_details)

	# check if ftp_location is writable.
	optlist['p'] = re.sub(r"[^\w]",'_',optlist['p']);
	if os.path.exists(ftp_path+'/'+runas+'_'+optlist['p']):
		shutil.rmtree(ftp_path+'/'+runas+'_'+optlist['p'])
	try:
		os.mkdir(ftp_path+'/'+runas+'_'+optlist['p'])
	except:
		print "Could not create folder %s/%s_%s" % (ftp_path,runas,optlist['p'])
		sys.exit(2)
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p'],0777)

	# put userid in place
	uf = open(ftp_path+'/'+runas+'_'+optlist['p']+'/uid.txt','w')
	uf.write(runas)	
	uf.close()
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+"/uid.txt",0777)
	# put projectname in place
	pn = open(ftp_path+'/'+runas+'_'+optlist['p']+'/project.name.txt','w')
	pn.write(optlist['p'])	
	pn.close()
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+"/project.name.txt",0777)

	###################
	## 1. UPLOAD DATA #
	###################
	try:
		f = open(optlist['s'],'r')
	except:
		print "Could not open file '"+optlist['s']+"' for reading"
		sys.exit(2)
	ss = csv.reader(f,delimiter='\t')
	# open filehandles. 
	names = open(ftp_path+'/'+runas+'_'+optlist['p']+"/names.txt",'w')
	genders = open(ftp_path+'/'+runas+'_'+optlist['p']+"/genders.txt",'w')
	formats = open(ftp_path+'/'+runas+'_'+optlist['p']+"/formats.txt",'w')
	dataidx = 0	
	for line in ss:
		if len(line) == 0:
			continue
		print "sample: %s" % (line[0])
		# check validity vcf should be file.
		if not os.path.isfile(line[2]):
			print "Specified VCF file does not exist:"+line[2] 
			print "  => Sample skipped"
			continue
		# increment counter
		dataidx += 1
		# add sample name to samples.txt
		names.write("name"+str(dataidx)+"=="+line[0]+'\n')
		# add gender to genders.txt
		genders.write("gender"+str(dataidx)+"=="+line[1]+'\n')
		# add store
		store = open(ftp_path+'/'+runas+'_'+optlist['p']+"/store."+str(dataidx)+".txt",'w')
		store.write(line[4])
		store.close()
		os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/store.'+str(dataidx)+'.txt',0777)
		# add format
		formats.write("format"+str(dataidx)+"=="+line[5]+'\n')
		# copy vcf
		shutil.copyfile(line[2],ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.vcf')
		os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.vcf',0777)
		# copy bam
		if os.path.isfile(line[3]):
			shutil.copyfile(line[3],ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.bam')
			os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/data'+str(dataidx)+'.bam',0777)


	f.close()
	# chmod samples, genders, store.
	names.close()
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/names.txt',0777)
	genders.close()
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/genders.txt',0777)
	formats.close()
	os.chmod(ftp_path+'/'+runas+'_'+optlist['p']+'/formats.txt',0777)
		
	if dataidx == 0:
		print "No data to import"
		sys.exit(0)
	#########################
	## 2. RUN IMPORT BY API #
	#########################
	try:
		answer = fetch_json_page(vdb_url + 'ImportData/'+runas+'_'+optlist['p']+'/'+str(dataidx)+'?apiKey='+apikey)
	except:
		print "Failed to start import on VariantDB by API."
		sys.exit(2);
	
	if answer['result'] == 'Started':
		print "Import started, job id is %s" %(answer['job_key'])

	###################################
	## 3. WAIT FOR  IMPORT TO FINISH ##
	###################################
	status = 'Running';
	jobKey = answer['job_key']
	print "Waiting for import to finish..."
	while status == 'Running':
		try:
			answer = fetch_json_page(vdb_url + 'GetStatus/Import/'+jobKey+'?apiKey='+apikey)
		except:
			print "Failed to get import status."
			sys.exit(2);
		status = answer['status']
		time.sleep(15)

	print "Import finished. Status: "+status

	###########
	## CLEAN ##
	###########
	# the 'ftp'-folder is cleaned by import api routine.
	sys.exit(0)

	
def SetRunAs(optlist,user_details):
	apikey = optlist['k']
	if 'u' in optlist:
		# check for permissions for running as different user.
		if (optlist['u'] != user_details['email'] and user_details['level'] < 3):
			print "provided email does not match api-user email."
			print "  => This is only allowed for admin users (which you are not)."
			print "  => provided: %s ; api-user: %s" % (oplist['u'],user_details['email'])
			sys.exit(2)
		# api and provided are the same
		if (optlist['u'] == user_details['email']):
			return(user_details['id'])
		# sufficient permissions to run as different user: get it's details.
		try:
			details = fetch_json_page(vdb_url + 'GetUserDetails/'+optlist['u']+'?apiKey='+apikey)
		except:
			print "Failed to retrieve user details for API-Key."
			sys.exit(2);

		if (details['id']):
			print "Importing to account of %s %s " % (details['FirstName'], details['LastName'])
			return(details['id'])
		else:
			print "Provided email is not registered at VariantDB."
			sys.exit(2)
		
	## no email provided, run as apiKey user
	return(user_details['id'])		

def getArgs(args):  
	## arguments
	# -k : apikey (mandatory)
	opts, args = getopt.getopt(args, 'k:p:s:u:h')
	optlist = dict()
	for opt, arg in opts:
		optlist[opt[1:]] = arg
	if 'p' not in optlist or optlist['p'] == '':
		print "Missing argument : -p"
		Usage()
	if 's' not in optlist or optlist['s'] == '':
		print "Missing argument : -s"
		Usage()

	if 'h' in optlist:
		Usage()
	return(optlist,args)

def Usage():
	# print help
	print "\n\nUsage: python Import_To_VariantDB.py -k  "
	print " Default: Import samples provided on ftp server"
	print " Mandatory: -k : api-key of administrator user (can import for others), or of user (import into this user account)"
	print " Mandatory: -p : Project name.  (string)."
	print " Mandatory: -s : Sample Sheet (path)."
	print " Optional : -u : Import as user. This is only allowed for admin users on VariantDB. (email)"
	print "\n"
	print "Note: ";
	print " Format of SampleSheet file (tab seperated, no header):";
	print "   - sample name"
	print "   - gender (Male/Female/[undef])"
	print "   - path_to_VCF"
	print "   - path_to_BAM"
	print "   - store data in VariantDB ([0]/1)" 
	print "   - formats:"
	print "       UG : Unified Genotyper"
	print "       HC : Haplotype Caller"
	print "       VS : Varscan"
	print "       MT : MuTect"
	print "       23 : 23 and Me, converted to VCF"
	print "       IT : Ion Torrent Variant Caller"
	print " "
	sys.exit(0)


def fetch_json_page(url):
    try: 
        data = urllib2.urlopen(url)
	j = json.load(data)
    except:
        print('Fetching api repsonse failed for following url:')
        print(url)
        sys.exit(2)
     
    ## return data
    return j 

if __name__ == "__main__":
	main()

4. Usage

Parameters:
  • Mandatory: -k : api-key of administrator user (can import for others), or of user (import into this user account)
  • Mandatory: -p : Project name. (string).
  • Mandatory: -s : Sample Sheet (path).
  • Optional : -u : Import as user. This is only allowed for admin users on VariantDB. (email)

Format of SampleSheet file (tab seperated, no header):

  • sample name
  • gender (Male/Female/[undef])
  • path_to_VCF
  • path_to_BAM
  • store data in VariantDB ([0]/1)
  • formats:
    • UG : Unified Genotyper
    • HC : Haplotype Caller
    • VS : Varscan
    • MT : MuTect
    • 23 : 23 and Me, converted to VCF
    • IT : Ion Torrent Variant Caller