#!/bin/sh
## Pathway Commons.
# This is where it all starts.
# Start with a blind search of Ubiquitin and NF-kB Alternate.
# Browse through pathways and download the interesting ones.
firefox http://www.pathwaycommons.org/pc/record2.do?id=517194
wget http://www.pathwaycommons.org/pc/downloadBioPax.do?id=517194
wget http://www.pathwaycommons.org/pc/downloadBioPax.do?id=486346
wget http://www.pathwaycommons.org/pc/downloadBioPax.do?id=486368
# Finally
# Download the Alternate Nf-kB, Ubiquitin, TRAF pathways
## STRING DB
## Cytoscape
# This study is resource intensive. Increase the memory footprint of JVM in either Cytoscape.vmoptions or on cmdline.
java -DXmx 5G -DXss 1G cytoscape
# Start with a blind search over AgilentLiterature Search addon.
# Put in similar queries as provided for Pathway Commons.
# Take the new results and go back to Pathway Commons to find respective pathways.
# Download the biopax.xml files.
# Load the networks in Cytoscape.
# Finally
## NCBI
## MUSCLE Alignment
# Get an initial alignment to prune false positives, if exists.
muscle -in ./seq.fasta -out seq.afa -tree1 tree.phy -anchors -log sq.log
# Get an html output file for easy viewing. Only suitable for small seq.
muscle -in ./seq.fasta -out seq.afa.html -tree1 tree.phy -anchors -html -log sq.log
# Refine the alignment
muscle -in seq.afa -out refined.afa -refine > sq.m.ref.out
# View the dendogram
perl treedraw.pl > fig7.svg
# Finally
# If some sequences seem to be too much out of way in the alignment and dendogram, then prune them and move on to next step.
## CLUSTAL MSA
# Create the alignment file in stockholm format to be used by HMMER
clustalo -i seq.fasta --distmat-out="seqDist.50" --guidetree-out="seqTree.dnd" --full --cluster-size=3 --clustering-out="seqCluster.cls" --trans=3 --posterior-out="seq.pos" --percent-id --outfile=sq.st --outfmt=st --resno --output-order=tree-order --iter=1000 --threads=4 --log="seq.log" -v --force
# Visualize the dendogram for differences from muscle o/p, if any.
perl treedraw.pl > fig7.svg
# Finally
# Match the results with MUSCLE o/p. If the results are are too different then see if some parameters need to be changed.
# Take sq.st alignment file and move on to next step.
## HMMER Search
# Build the hmm profile
hmmbuild sq.hmm sq.st
# Search the swissprot database with the profile
hmmsearch ./sq.hmm /run/media/aamijaninaa/fedora/home/bio/fromHome/NewFolder/database/swissprot > sq.out
# Perform PSI-BLAST iterative search
jackhmmer ./sq.st /run/media/aamijaninaa/fedora/home/bio/fromHome/NewFolder/database/swissprot > sq.psi.pfam.out
# Finally
# Study the sq.out and search for matches having E-Value between e-60 and 0, Bias = 0 and Score > 300
# Study the sq.psi.pfam.out and search for matches with similar parameters.
# Further the search with while studying the MSA and picking up sequences having similarity around the motifs of interest.
## RCSB Search
# Note the biological units of interest from the previous steps.
# In case of a duplicate structures found in the PDB db, look into Remark 100 for date of entry. Select latest entry
# Study Remark 350 for structure details.
# Study Remark 465, 470, 500 for missing residues and atoms and peculiar torsion angles.
# Specify the required sequences in the perl script.
perl downloadPDB.pl
# Finally
# Verify that all the sequences requested were downloaded.
# Sometimes the file size varies. Make sure that the complete PDB file was downloaded in such cases.
## MODELLER
# After deciding which sequences to consider for modelling and downloading their PDB structures, move on to Modeller
# Iteration 1: Take only the target sequences and perform all the steps in @Structure Alignment
# Iteration 2: Take the structures reported in build_profile.log and append them to the end of existing seq.fasta and perform @Structure Alignment one more time.
# Iteration 3: Decide final templates and model.
# Structure Alignment
# Running the program will
# 1a. Convert the i/p fasta sequence into PIR format.
# 1b. Compress the PDB95 database. Further runs may skip step 1b.
# 2a. Read the binary sequence database.
# 2b. Read the PIR format alignment file.
# 3a. Create a profile alignment.
# 3b. Scan the SWISSPROT sequence db to pick homologous sequences.
# 3c. Create sq.pir MSA.
python3 build_prof.py
# Finally
# seq.pir file obtained is a structure alignment of the i/p sequences. That is why it complains there are some distant seq.
# Match that str aln with the seq aln files obtained from MUSCLE and CLUSTAL.
# Study the differences. If they can be explained, then fine else repeat everything.
# Study the dendogram and select template.
## Intermission
# At this point, you have a structurally homologous subset of templates of interest.
# Repeat everything to see if there is anything else that could be considered.
# Model Building
# Read in the template structures and add to alignment.
# Create a structure alignement of templates with target model.
# Refine the loops and create the final model.
# Evaluate structures using DOPE Score.
python3 build_model.py
# Finally
# Study the DOPE Graph and DOPE Scores.
# Select a final model
## Manual Homology Modelling
# Take the best hit based on GA and DOPE Scores and model it manually in Swiss PDB and Pymol.
pymol best_hit.pdb