# Create a directory mkdir bioc3301 # See what directory we are in pwd # See a list of files and directories in our current directory ls ls -l ls -lt ls -ltr # Change to the directory we created cd bioc3301/ pwd ls # List ALL files (including hidden ones) ls -a cd . pwd # Move up a directory cd .. pwd cd bioc3301/ # Go to your home directory cd pwd cd bioc3301/ ls # Grab the example files curl -O http://www.bioinf.org.uk/teaching/bioc3301/pdbs.tgz ls -l # Change a file name mv pdbs.tgz files.tgz ls mv files.tgz pdbs.tgz ls # Unpack a gzipped tar file tar xvzf pdbs.tgz ls cd pdbs ls # See the contents of a file cat pdb9aat.ent ls # Use cat to concatenate files cat pdb9aat.ent pdb9abp.ent cat pdb9aat.ent pdb9abp.ent > both.pdb # list files with human-readable file sizes (KB, MB, etc) ls -lh # Delete a file rm both.pdb # Concatenate all .ent files cat *.ent >all.pdb ls -lh rm all.pdb # Look at a file a page at a time # (space to move forward a page; b to move back a page; # up/down arrows to move a line at a time; q to quit) less pdb9aat.ent # Copy a file (not the way we usually do it cat pdb9aat.ent > copy.pdb # Compare the files sizes to show the copy is the same size ll # Prove that the files are identical diff pdb9aat.ent copy.pdb rm -i copy.pdb # Get the first 20 lines from two files head -20 pdb9aat.ent > pdb9aat.top head -20 pdb9abp.ent > pdb9abp.top # Find the differences between them diff *.top # Put the differences in a file diff *.top > differences.txt less differences.txt # Look at the differences directly using a pipe into less # rather than via a file diff *.top | less # Create an alias so typing 'rm' is the same as 'rm -i' # (prompts to check you want to delete it) alias rm='rm -i' rm differences.txt # Go to a sub directory of your home directory cd ~/bioc3301 # Use cat simply to echo what you type (Ctrl-D to end) cat # Use cat to create a file # - enter: This is a test # then Ctrl-D cat >test.dat # See what you have created ls cat test.dat # Use the same method to create a file containing the useful aliases # in the home directory # - enter: alias rm='rm -i' # alias ll='ls -l' # then Ctrl-D cd cat >.bashrc # Look at what you created cat .bashrc # Create a 'symbolic link' (a shortcut) ln -s .bashrc .profile # This file (.bashrc or .profile) will be executed whenever you run git-bash # To run it now: source .bashrc # See the aliases you created alias ll alias rm # Doing ls - you can't see the .bashrc or .profile files. You need to add -a ls -ltr ls -ltra # Create and remove a directory mkdir foo ls rmdir foo ls # You can't remove a directory that contains any files - this won't work rmdir pdbs # You need to do it like this: rm -rf pdbs # Re-extract the sample directory tar xzvf pdbs.tgz cd pdbs # Extract the lines containing 'ATOM' grep ATOM pdb9aat.ent grep ATOM pdb9aat.ent | less # Extract lines that start with 'ATOM' grep ^ATOM pdb9aat.ent | less # See how many there are grep ^ATOM pdb9aat.ent | wc # Find the C-alpha atoms and how many there are grep ^ATOM pdb9aat.ent | grep ' CA ' grep ^ATOM pdb9aat.ent | grep ' CA ' | wc # Alternative way of doing the same thing by piping into grep cat pdb9aat.ent | grep ^ATOM | grep ' CA ' | wc # Do the same but with all the .ent files cat *.ent | grep ^ATOM | grep ' CA ' | wc cat *.ent | grep ^ATOM | grep ' CA ' | wc -l # Now we want to know the amino acids grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}' grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort | less # Unique list of amino acids present in this protein grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort -u # Count them - are all 20 used? grep ^ATOM pdb9aat.ent | grep ' CA ' | awk '{print $4}'| sort -u | wc -l # # The following shows how you can do something across a set of files. # In reality we would be using some more complex program - here we are # just counting lines as a demonstration. We want a 'wordcount' file # for each .ent file # # Count lines in all .ent fils wc *.ent wc -l *.ent # Store info for one file wc -l pdb9aat.ent > pdb9aat.wc cat pdb9aat.wc # Remove what we did rm *.wc # Run wc on each .ent file in turn for file in *.ent do wc -l $file done # Do the same but save in a file for file in *.ent do wc -l $file > $file.wc done cat pdb9aat.ent.wc rm *.wc # Use the basename command to extract the main part of a filename # removing the path basename pdb9aat.ent basename /disk1/localhome/localuser/bioc3301/pdbs/pdb9aat.ent # Use basename to strip the file extension as well basename pdb9aat.ent .ent # Use this to create better filenames # Note - use the `` around a command to show that it is a command # rather than a filename for file in *.ent do wc -l $file > `basename $file .ent`.wc done ls