# Create a directory
mkdir bioc3301
# See what directory we are in
pwd
# See a list of files and directories in our current directory
ls
ls -l
ls -lt
ls -ltr
# Change to the directory we created
cd bioc3301/
pwd
ls
# List ALL files (including hidden ones)
ls -a
cd .
pwd
# Move up a directory
cd ..
pwd
cd bioc3301/
# Go to your home directory
cd
pwd
cd bioc3301/
ls
# Grab the example files
curl -O http://www.bioinf.org.uk/teaching/bioc3301/pdbs.tgz
ls -l
# Change a file name
mv pdbs.tgz files.tgz
ls
mv files.tgz pdbs.tgz
ls
# Unpack a gzipped tar file
tar xvzf pdbs.tgz 
ls
cd pdbs
ls
# See the contents of a file
cat pdb9aat.ent 
ls
# Use cat to concatenate files
cat pdb9aat.ent pdb9abp.ent 
cat pdb9aat.ent pdb9abp.ent > both.pdb
# list files with human-readable file sizes (KB, MB, etc)
ls -lh
# Delete a file
rm both.pdb 
# Concatenate all .ent files
cat *.ent >all.pdb
ls -lh
rm all.pdb 
# Look at a file a page at a time 
# (space to move forward a page; b to move back a page; 
#  up/down arrows to move a line at a time; q to quit)
less pdb9aat.ent 
# Copy a file (not the way we usually do it
cat pdb9aat.ent > copy.pdb
# Compare the files sizes to show the copy is the same size
ll
# Prove that the files are identical
diff pdb9aat.ent copy.pdb 
rm -i copy.pdb 
# Get the first 20 lines from two files
head -20 pdb9aat.ent > pdb9aat.top
head -20 pdb9abp.ent > pdb9abp.top
# Find the differences between them
diff *.top
# Put the differences in a file
diff *.top > differences.txt
less differences.txt 
# Look at the differences directly using a pipe into less 
# rather than via a file
diff *.top | less
# Create an alias so typing 'rm' is the same as 'rm -i'
# (prompts to check you want to delete it)
alias rm='rm -i'
rm differences.txt 
# Go to a sub directory of your home directory
cd ~/bioc3301
# Use cat simply to echo what you type (Ctrl-D to end)
cat
# Use cat to create a file
# - enter: This is a test
#   then Ctrl-D
cat >test.dat
# See what you have created
ls
cat test.dat 
# Use the same method to create a file containing the useful aliases
# in the home directory
# - enter: alias rm='rm -i'
#          alias ll='ls -l'
#   then Ctrl-D
cd
cat >.bashrc
# Look at what you created
cat .bashrc 
# Create a 'symbolic link' (a shortcut)
ln -s .bashrc .profile
# This file (.bashrc or .profile) will be executed whenever you run git-bash
# To run it now:
source .bashrc
# See the aliases you created
alias ll
alias rm
# Doing ls - you can't see the .bashrc or .profile files. You need to add -a
ls -ltr
ls -ltra
# Create and remove a directory
mkdir foo
ls
rmdir foo
ls
# You can't remove a directory that contains any files - this won't work
rmdir pdbs
# You need to do it like this:
rm -rf pdbs
# Re-extract the sample directory
tar xzvf pdbs.tgz 
cd pdbs
# Extract the lines containing 'ATOM'
grep ATOM pdb9aat.ent 
grep ATOM pdb9aat.ent | less
# Extract lines that start with 'ATOM'
grep ^ATOM pdb9aat.ent | less
# See how many there are
grep ^ATOM pdb9aat.ent | wc
# Find the C-alpha atoms and how many there are
grep ^ATOM pdb9aat.ent | grep ' CA '
grep ^ATOM pdb9aat.ent | grep ' CA ' | wc
# Alternative way of doing the same thing by piping into grep
cat pdb9aat.ent | grep ^ATOM  | grep ' CA ' | wc
# Do the same but with all the .ent files
cat *.ent | grep ^ATOM  | grep ' CA ' | wc
cat *.ent | grep ^ATOM  | grep ' CA ' | wc -l
# Now we want to know the amino acids
grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'
grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort
grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort | less
# Unique list of amino acids present in this protein
grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort -u
# Count them - are all 20 used?
grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort -u | wc -l
#
# The following shows how you can do something across a set of files.
# In reality we would be using some more complex program - here we are
# just counting lines as a demonstration. We want a 'wordcount' file
# for each .ent file
#
# Count lines in all .ent fils
wc *.ent
wc -l *.ent
# Store info for one file
wc -l pdb9aat.ent > pdb9aat.wc
cat pdb9aat.wc 
# Remove what we did
rm *.wc
# Run wc on each .ent file in turn
for file in *.ent
do
wc -l $file
done
# Do the same but save in a file
for file in *.ent
do
wc -l $file > $file.wc
done
cat pdb9aat.ent.wc 
rm *.wc
# Use the basename command to extract the main part of a filename
# removing the path
basename pdb9aat.ent
basename /disk1/localhome/localuser/bioc3301/pdbs/pdb9aat.ent
# Use basename to strip the file extension as well
basename pdb9aat.ent .ent
# Use this to create better filenames
# Note - use the `` around a command to show that it is a command
# rather than a filename
for file in *.ent
do
wc -l $file > `basename $file .ent`.wc
done
ls