1 Data

$ /project2/xinhe/yanyul/deep_variant/yanyu/deep_brain/data
$ cp ../ASD/other_annotation/epigenomic_annotation/Noonan_hNSC_*bb deep_variant/yanyu/deep_brain/data/

2 BigBed to Bed

$ bash process_ATACseq_from_Noonan.sh

3 Label DeepSEA sequence with ATAC-seq signal

$ cd ../
$ python my_scripts/wrapper_label_intervals.py data/allTFs.pos.withID.bed data/0628_ATACseq_list.txt test
awk: cmd. line:1: (FILENAME=- FNR=4097) fatal: print to "standard output" failed (Broken pipe)
cat: write error: Broken pipe
mkdir test/0628_ATACseq_list.txt
>>> working on data/Noonan_hNSC_50_peaks.bed
>>> >>> sort data/Noonan_hNSC_50_peaks.bed
>>> >>> checking data/Noonan_hNSC_50_peaks.bed
>>> >>> number of columns data/Noonan_hNSC_50_peaks.bed
awk: cmd. line:1: (FILENAME=- FNR=2731) fatal: print to "standard output" failed (Broken pipe)
cat: write error: Broken pipe
>>> >>> intersecting data/Noonan_hNSC_50_peaks.bed
>>> previous = 124826, after = 124826
>>> working on data/Noonan_hNSC_P15-1_peaks.bed
>>> >>> sort data/Noonan_hNSC_P15-1_peaks.bed
>>> >>> checking data/Noonan_hNSC_P15-1_peaks.bed
>>> >>> number of columns data/Noonan_hNSC_P15-1_peaks.bed
awk: cmd. line:1: (FILENAME=- FNR=2731) fatal: print to "standard output" failed (Broken pipe)
cat: write error: Broken pipe
>>> >>> intersecting data/Noonan_hNSC_P15-1_peaks.bed
>>> previous = 84105, after = 84105
>>> working on data/Noonan_hNSC_P5-1_peaks.bed
>>> >>> sort data/Noonan_hNSC_P5-1_peaks.bed
>>> >>> checking data/Noonan_hNSC_P5-1_peaks.bed
>>> >>> number of columns data/Noonan_hNSC_P5-1_peaks.bed
awk: cmd. line:1: (FILENAME=- FNR=2731) fatal: print to "standard output" failed (Broken pipe)
cat: write error: Broken pipe
>>> >>> intersecting data/Noonan_hNSC_P5-1_peaks.bed
>>> previous = 67160, after = 67160
>>> working on data/Noonan_hNSC_P5-2_peaks.bed
>>> >>> sort data/Noonan_hNSC_P5-2_peaks.bed
>>> >>> checking data/Noonan_hNSC_P5-2_peaks.bed
>>> >>> number of columns data/Noonan_hNSC_P5-2_peaks.bed
awk: cmd. line:1: (FILENAME=- FNR=2731) fatal: print to "standard output" failed (Broken pipe)
cat: write error: Broken pipe
>>> >>> intersecting data/Noonan_hNSC_P5-2_peaks.bed
>>> previous = 86563, after = 86563

4 Merge labels and prepare input file for training

4.1 Merge all labels

$ cd /project2/xinhe/yanyul/deep_variant/yanyu/deep_brain
$ cd data
$ ls Noonan_h*intersect > ../test/0629_ATACseq_merged/merge.txt
$ python my_scripts/merge_labels.py test/0628_ATACseq_list.txt/label.hdf5 test/0629_ATACseq_merged/merge.txt test/0629_ATACseq_merged/

4.2 Combine labels with sequences

4.2.1 Training

$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_train_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5
HDF5 "my_train/DeepSEA_train_12__with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 4400000, 4 ) / ( 4400000, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 4400000, 925 ) / ( 4400000, 925 ) }
   }
}
}
HDF5 "test/0629_ATACseq_merged/label_merge.hdf5" {
GROUP "/" {
   DATASET "merged" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 2608182, 4 ) / ( 2608182, 4 ) }
   }
}
}
$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_train_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5 trainxdata merged 1 2200000 test/0629_ATACseq_merged/

4.2.2 Testing

$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_test_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5
HDF5 "my_train/DeepSEA_test_12__with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 455024, 4 ) / ( 455024, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 455024, 925 ) / ( 455024, 925 ) }
   }
}
}
HDF5 "test/0629_ATACseq_merged/label_merge.hdf5" {
GROUP "/" {
   DATASET "merged" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 2608182, 4 ) / ( 2608182, 4 ) }
   }
}
}
$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_test_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5 trainxdata merged 2309367 2536878 test/0629_ATACseq_merged/

4.2.3 Validation

$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_valid_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5
HDF5 "my_train/DeepSEA_valid_12__with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 8000, 4 ) / ( 8000, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 8000, 925 ) / ( 8000, 925 ) }
   }
}
}
HDF5 "test/0629_ATACseq_merged/label_merge.hdf5" {
GROUP "/" {
   DATASET "merged" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 2608182, 4 ) / ( 2608182, 4 ) }
   }
}
}
$ python my_scripts/add_y_label_and_rename.py my_train/DeepSEA_valid_12__with_label.hdf5 test/0629_ATACseq_merged/label_merge.hdf5 trainxdata merged 2200001 2204000 test/0629_ATACseq_merged/

4.2.4 Summary

$ h5dump -A -H test/0629_ATACseq_merged/*with_label.hdf5
HDF5 "test/0629_ATACseq_merged/DeepSEA_test_12__with_label_with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 455024, 4 ) / ( 455024, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 455024, 925 ) / ( 455024, 925 ) }
   }
}
}
HDF5 "test/0629_ATACseq_merged/DeepSEA_train_12__with_label_with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 4400000, 4 ) / ( 4400000, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 4400000, 925 ) / ( 4400000, 925 ) }
   }
}
}
HDF5 "test/0629_ATACseq_merged/DeepSEA_valid_12__with_label_with_label.hdf5" {
GROUP "/" {
   DATASET "traindata" {
      DATATYPE  H5T_IEEE_F64LE
      DATASPACE  SIMPLE { ( 8000, 4 ) / ( 8000, 4 ) }
   }
   DATASET "trainxdata" {
      DATATYPE  H5T_IEEE_F32LE
      DATASPACE  SIMPLE { ( 8000, 925 ) / ( 8000, 925 ) }
   }
}
}