<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type='text/xsl' href='/hyperscope/src/client/lib/hs/xslt/hyperscope.xsl'?>
<opml version="1.0">
  <head>
    <title>Basic Algorithms Notes</title>
    <expansionState>0,45,48,55,57,58,61,63,69,70,72,87,89,95,96,101,105,108,109,110,115,119,122,124,127,129,131,133,136,138,140,146,147,148,153,154,155,160,161,163,165,167,168,170,200,201,202,204,208,209,211,213,215,218,223,224,229,230,232,236,238,240,245,247,252,255,256,258,261,265,271,272,275,278,283,286,288,290,291,296,299,300,304,306,309,310,314,321,328,330,332,334,335,337,339,346,350,351,357,358,360,362,364,365,368,370,377,380,384,388,389,398,401,405,409,411,415,418,419,422,423,429,432,438,439,440,444,446,447,452,453,461,464,465,467,469,470,471,473,474,478,479,482,483,487,489,492,493,494,498,499,504,505,510,512,516,517,520,522,523,524,526,528,530,532,538,540,541,544,547,549,552,556,572,573,576,578,580,582,584,587,598,606,610,614,616,618,621,622,623,625,633,634,636,638,642,646,655,657,658,662,668,671,675,679,680,683,684,686,689,691,693,698,699,701,703,707,709,716,717</expansionState>
  </head>
  <body>
    <outline text="Data Structures &amp; Algorithms">
      <outline text="Performance/Complexity">
        <outline text="Performance">
          <outline text="The amount of CPU/memory/disk usage"/>
        </outline>
        <outline text="Complexity">
          <outline text="Definition">
            <outline text="How well an algorithm scales"/>
            <outline text="How an algorithm behaves as the size of the problem increases"/>
          </outline>
          <outline text="Big O Notation">
            <outline text="Order of magnitude of the number of operations required to perform a function"/>
            <outline text="Growth relative to the size of the problem"/>
            <outline text="Common orders in order from best to worst">
              <outline text="O(1)">
                <outline text="Constant time"/>
                <outline text="Performance isn't affected by the size of the problem"/>
                <outline text="Could still be slow, just stays slow"/>
                <outline text="Example">
                  <outline text="Array lookup"/>
                  <outline text="Grabbing something from memory"/>
                </outline>
              </outline>
              <outline text="O(log N)">
                <outline text="logarithmic time"/>
                <outline text="Usually requires your algorithm to somehow discard large portions of input set, so algorithms of this kind usually have searching of some kind"/>
                <outline text="Usually log base 2"/>
                <outline text="Trick">
                  <outline text="To calculate log base 2 of a number, work out number of binary digits needed to store that number"/>
                  <outline text="Example">
                    <outline text="log base 2 of 300 = 9, as it takes 9 binary digits to express the number 300"/>
                  </outline>
                </outline>
              </outline>
              <outline text="O(N log N)">
                <outline text="proportional to size of problem and the logarithmic time"/>
              </outline>
              <outline text="O(N)">
                <outline text="linear time"/>
                <outline text="Example">
                  <outline text="As you add more customers in grocery line, time for each one stays the same"/>
                </outline>
              </outline>
              <outline text="O(N^2)">
                <outline text="quadratic time"/>
                <outline text="Example">
                  <outline text="Every person in a group meets every other person"/>
                </outline>
              </outline>
              <outline text="O(N!)">
                <outline text="factorial time"/>
              </outline>
            </outline>
          </outline>
        </outline>
      </outline>
      <outline text="Data Structures">
        <outline text="Lists">
          <outline text="Ordered collection of elements supporting random access to each element"/>
          <outline text="Preserve insertion order"/>
          <outline text="Array Lists">
            <outline text="Uses array to hold values"/>
            <outline text="Pros">
              <outline text="Implementation very simple"/>
              <outline text="Fastest implementation for indexed and sequential access"/>
            </outline>
            <outline text="Cons">
              <outline text="Each time you insert element, you have to shift any higher elements one position through physical copying"/>
              <outline text="Same issue with deletion"/>
              <outline text="Dynamically increasing size of list requires creating new array and copying elements over"/>
              <outline text="So, performance for insertions and deletions not as good"/>
            </outline>
          </outline>
          <outline text="Linked Lists">
            <outline text="Chain of elements in which each element has a reference to the next (and optionally previous) element"/>
            <outline text="Doubly-linked list goes in both directions">
              <outline text="Can access either end with equal performance"/>
            </outline>
            <outline text="Pros">
              <outline text="Insertion and Deletion are much simpler than array list">
                <outline text="Only have to update elements to and from the elements around the affected element"/>
                <outline text="So cost is almost neglible"/>
              </outline>
            </outline>
            <outline text="Cons">
              <outline text="For large lists, traversal time can be a performance issue"/>
            </outline>
            <outline text="Sentinel trick">
              <outline text="Also called Null Object Pattern/Null Object"/>
              <outline text="Technique for simplifying an algorithm by adding a special object to one or both ends of a data structure to avoid writing special code that handles boundary conditions"/>
              <outline text="Allows you to avoid having to check for 'null' all the time"/>
              <outline text="Next and Previous fields of sentinel point to start and end of linked list"/>
              <outline text="First and last elements themselves reference the sentinel object as their previous and next elements, respectively"/>
            </outline>
            <outline text="Trick for increasing getting a particular element's performance">
              <outline text="If desired position falls in first half of list">
                <outline text="Start searching from beginning"/>
              </outline>
              <outline text="Otherwise">
                <outline text="Start searching from end"/>
              </outline>
              <outline text="Cuts actual performance time in half"/>
            </outline>
          </outline>
        </outline>
        <outline text="Queues">
          <outline text="You only ever get the head of a queue">
            <outline text="The head of the queue differs based on the kind of queue you are dealing with"/>
          </outline>
          <outline text="Producer">
            <outline text="Anything that stores data in a queue"/>
          </outline>
          <outline text="Consumer">
            <outline text="Anything that retrieves data from a queue"/>
          </outline>
          <outline text="Bounded Queues">
            <outline text="Have limits placed on the number of items that can be held at any one time"/>
          </outline>
          <outline text="Unbounded Queues">
            <outline text="Free to grow in size"/>
          </outline>
          <outline text="Queue operations">
            <outline text="enqueue">
              <outline text="store a value in the queue"/>
            </outline>
            <outline text="dequeue">
              <outline text="retrieve the value from the head of the queue"/>
            </outline>
          </outline>
          <outline text="First In First Out (FIFO)">
            <outline text="Order of retrieval same as order of insertion"/>
            <outline text="First value to go in is first value to come out"/>
            <outline text="Calling dequeue() will always give you the element that has been waiting longest"/>
            <outline text="Implementation">
              <outline text="Using a list as underlying storage is best option">
                <outline text="Linked list better than array list since size will probably grow and shrink"/>
              </outline>
            </outline>
          </outline>
          <outline text="Blocking Queues">
            <outline text="Useful in multi-threaded environment"/>
            <outline text="All access to data correct synchronized"/>
            <outline text="A blocking queue">
              <outline text="When attempt is made to store item beyond queue limit, queue will block until space is available"/>
              <outline text="If dequeue() is called and nothing available, will block thread until item is available"/>
              <outline text="Good for pool of workers who want tasks to perform"/>
            </outline>
            <outline text="Implementation">
              <outline text="In general, keep same external API as standard Queue, just wrap it with a synchronized version that internally composes non-blocking version"/>
              <outline text="Internally, use mutual exclusion semaphor (mutex) (a common object) as synchronization point for all operations to block on"/>
              <outline text="Java">
                <outline text="syncronized(_mutex)">
                  <outline text="while we are at max size">
                    <outline text="try{ _mutex.wait() } catch(){}"/>
                  </outline>
                  <outline text="_queue.enqueue(value)"/>
                  <outline text="_mutex.notifyAll()"/>
                </outline>
              </outline>
            </outline>
          </outline>
          <outline text="Priority Queues">
            <outline text="Special type of queue that provides access to the largest element contained within it (could be smallest)">
              <outline text="More general form of queue"/>
              <outline text="FIFO queue could use comparator that is based on time since insertion"/>
              <outline text="LIFO queue could use comparator that is based on time of insertion"/>
            </outline>
            <outline text="Unsorted List Priority Queue">
              <outline text="Keep as unsorted list"/>
              <outline text="Just search through for largest item each time"/>
              <outline text="Enqueue">
                <outline text="O(1)"/>
              </outline>
              <outline text="Dequeue">
                <outline text="O(n)"/>
              </outline>
              <outline text="Good if not too many calls to dequeue, since so simple"/>
            </outline>
            <outline text="Sorted List Priority Queue">
              <outline text="Keep list in sorted order so always ready to get largest"/>
              <outline text="Deuque will be very fast">
                <outline text="Just removes last item from the list"/>
              </outline>
              <outline text="Enqueue does all the hard work">
                <outline text="Can just use insertion sort, because insertion sort works well on list that is mostly sorted already and we are just adding one item"/>
              </outline>
            </outline>
            <outline text="Heap-ordered Priority Queue">
              <outline text="Heaps">
                <outline text="A binary tree structure in which each element is larger than its two children - known as the heap condition"/>
                <outline text="The largest item is sitting at top of tree">
                  <outline text="other elements are not sorted and are not of interest"/>
                </outline>
                <outline text="Trick for storing heaps">
                  <outline text="Logically a tree structure, but we store as a list"/>
                  <outline text="To navigate tree structure stored as list (we numbered each node, starting from 0 at root):">
                    <outline text="The left child of the item at index X is at index (2X + 1)"/>
                    <outline text="The right child of the item at index X is at index (2X + 2)"/>
                    <outline text="The parent of the item at index X is at index ((X - 1) / 2); item 0 has no parent"/>
                  </outline>
                </outline>
              </outline>
              <outline text="Adding new items">
                <outline text="Just add to end"/>
                <outline text="Heap condition no longer applies though">
                  <outline text="Must swim element to the top to restore">
                    <outline text="Swimming">
                      <outline text="Exchanging an item with its parent if the parent is smaller, and continuing until the top of the heap is reached or a parent is found that is equal to or larger than the item doing the swimming"/>
                    </outline>
                  </outline>
                </outline>
              </outline>
              <outline text="Deleting largest item">
                <outline text="Just grab root"/>
                <outline text="Swap in value at bottom right of the tree (i.e. last position of list)"/>
                <outline text="Heap condition now violated">
                  <outline text="Must sink value down">
                    <outline text="Sinking">
                      <outline text="Process of repeatedly exchanging an item with the larger of its children until the heap condition is restored or the bottom of the tree is reached"/>
                    </outline>
                  </outline>
                </outline>
              </outline>
              <outline text="Adds and removes items from queue in O(log N)"/>
            </outline>
          </outline>
        </outline>
        <outline text="Stacks">
          <outline text="A stack is a list with access restricted to one end"/>
          <outline text="Operations">
            <outline text="Pop">
              <outline text="Deletes and retrieves top-most element"/>
            </outline>
            <outline text="Push">
              <outline text="Pushes a new element on to the stack"/>
            </outline>
            <outline text="Peek">
              <outline text="Returns but does not delete top of the stack"/>
            </outline>
          </outline>
          <outline text="Implementation">
            <outline text="Just use a list">
              <outline text="Compose using a LinkedList implementation"/>
            </outline>
          </outline>
          <outline text="Some use cases">
            <outline text=" MRU (Most Recently Used) list"/>
            <outline text="parsing programming languages"/>
            <outline text="Undo-Redo"/>
            <outline text="Tower of Hanoi"/>
            <outline text="RPN (Reverse Polish Notation) calculator"/>
            <outline text="reversing list of values"/>
            <outline text="XML processing"/>
            <outline text="screen flow management (back/forward button in browser)"/>
          </outline>
        </outline>
        <outline text="Trees">
          <outline text="Binary Search Trees">
            <outline text="Binary search algorithms suffer when lots of adds or deletes"/>
            <outline text="Can achieve O(log N) search time of binary search tree while being easy to add and delete things"/>
            <outline text="Leaf nodes">
              <outline text="Have no children"/>
            </outline>
            <outline text="Binary tree has two children">
              <outline text="All children to left must have smaller values"/>
              <outline text="All children to right must have larger values"/>
            </outline>
            <outline text="Average search time is proportional to its height: O(h)">
              <outline text="For balanced tree the height is O(log N)"/>
              <outline text="If unbalanced, can lead to worst-case search time of O(N)"/>
            </outline>
            <outline text="Minimum">
              <outline text="Node with smallest value"/>
              <outline text="Follow left links starting from root to find smallest value"/>
              <outline text="Leftmost node in the tree"/>
            </outline>
            <outline text="Maximum">
              <outline text="Node with largest vlue"/>
              <outline text="Follow right-hand links from root to find largest value"/>
              <outline text="Rightmost node in the tree"/>
            </outline>
            <outline text="Successor">
              <outline text="One with the next largest value in the tree">
                <outline text="Finding">
                  <outline text="Two cases">
                    <outline text="If node has right child, then successor is minimum of that"/>
                    <outline text="If node has no right child, search back up the tree until you find first &quot;right-hand turn&quot;">
                      <outline text="Keep looking up tree until you find a node that is the left-child, then use its parent"/>
                    </outline>
                  </outline>
                </outline>
              </outline>
            </outline>
            <outline text="Predecessor">
              <outline text="Node with the next smallest value"/>
              <outline text="Finding">
                <outline text="Two cases">
                  <outline text="If node has left child, then take its maximum"/>
                  <outline text="If node has no left child, work up the tree until you find a left-hand turn">
                    <outline text="Keep looking up tree until you find a node that is the right-child, then use its left-most child if present, or just it if no left-most child"/>
                  </outline>
                </outline>
              </outline>
            </outline>
            <outline text="Search">
              <outline text="Start at root node and follow links left or right as appropriate (whether value is smaller or larger, respectively) until you find appropriate value or none"/>
            </outline>
            <outline text="Insertion">
              <outline text="If search returns nothing, then add to tree as root node"/>
              <outline text="If nonrandom data added, then tree will degenerate into a linked list, and height of the tree, and average search time, because O(N)"/>
              <outline text="Must use a tree that does re-balancing">
                <outline text="Red-Black trees"/>
                <outline text="AVL trees"/>
                <outline text="Splay trees"/>
                <outline text="B-Trees"/>
              </outline>
            </outline>
            <outline text="Deletion">
              <outline text="three cases to consider. Node to be deleted will reflect one of the following:">
                <outline text="No children (a leaf), in which case just remove it"/>
                <outline text="One child, in which case you replace deleted node with its child"/>
                <outline text="Two children, in which case you swap the node with its successor and try again with case 1 or 2"/>
              </outline>
              <outline text="Can unbalance a tree"/>
            </outline>
            <outline text="In-order Traversal">
              <outline text="Visits the values in a binary search tree in sorted order">
                <outline text="Useful for printing out or processing values in order"/>
                <outline text="Recursive algorithm">
                  <outline text="Traverse the left subtree of the node"/>
                  <outline text="Visit node itself"/>
                  <outline text="Traverse right subtree"/>
                </outline>
                <outline text="Iterative algorithm">
                  <outline text="Start with the minimum of the tree and visit it and each successor node until there are no more"/>
                </outline>
              </outline>
            </outline>
            <outline text="Pre-order Traversal">
              <outline text="First visits the root node, then each of the subtrees"/>
              <outline text="Recursive algorithm">
                <outline text="Visit the node itself"/>
                <outline text="Traverse left subtree"/>
                <outline text="Traverse right subtree"/>
              </outline>
              <outline text="Iterative algorithm complex and requires stack"/>
            </outline>
            <outline text="Post-order Traversal">
              <outline text="Visits the root node after each of the subtrees"/>
              <outline text="Recursive Algorithm">
                <outline text="Traverse left subtree"/>
                <outline text="Traverse right subtree"/>
                <outline text="Visit the node itself"/>
              </outline>
              <outline text="Iterative algorithm complex and requires stack"/>
            </outline>
            <outline text="Balancing">
              <outline text="Balancing restores desired characteristics of tree"/>
              <outline text="Difficult thing is detecting imbalance in first place"/>
              <outline text="AVL Tree">
                <outline text="Track the height of each subtree">
                  <outline text="If two siblings ever differ in height by more than one, tree has become unbalanced"/>
                </outline>
                <outline text="To correct">
                  <outline text="rotating nodes to remove the imbalance"/>
                  <outline text="Working up tree from inserted/deleted node to the root, rotating nodes as necessary"/>
                  <outline text="Four different types of rotation, single or double rotation, left/right version">
                    <outline text="Special table not reproduced here on which to do"/>
                  </outline>
                </outline>
              </outline>
              <outline text="Red-Black Tree"/>
            </outline>
          </outline>
          <outline text="Ternary Search Trees">
            <outline text="Overview">
              <outline text="Specific for storing and retrieving strings"/>
              <outline text="Like a binary search tree, so each node has reference to smaller and larger values"/>
              <outline text="However, each node doesn't have entire value, just one letter"/>
              <outline text="Also has a reference, hence ternary, to a subtree of nodes containing any letters that follow it in the word"/>
              <outline text="A node must indicate whether it is a word ending, so we can differentiate partial from full real matches"/>
            </outline>
            <outline text="Performance">
              <outline text="Compares fewer individual character comparisons with an equivalent binary search tree">
                <outline text="Because words that share common prefix are compressed"/>
              </outline>
              <outline text="Efficient at quickly discarding words that don't exist in the tree, because terminates if no matching prefix found"/>
              <outline text="If size of alphabet is M (such as A-Z), then to find a word of length M results in O(N log M).">
                <outline text="In practice, performance much better due to common prefixes and Zipfs law"/>
              </outline>
              <outline text="Unbalanced tree devolves to O(NM) comparisons"/>
            </outline>
            <outline text="Searching for a word">
              <outline text="Perform a binary search starting with the first letter of the word for which you are searching"/>
              <outline text="Start at root and follow links left and right as appropriate"/>
              <outline text="Once you find a matching node, move down one level to its reference child and start again, this time with the second letter"/>
              <outline text="Continues until you find all the letters, in which case you have a match, or run out of nodes"/>
            </outline>
            <outline text="Inserting a word">
              <outline text="Add extra leaf nodes for any letters that don't already exist"/>
              <outline text="Store word endings if a letter is an ending"/>
            </outline>
            <outline text="Prefix searching">
              <outline text="Ternary search trees useful for finding all words with a common prefix"/>
              <outline text="Perform standard search just using prefix">
                <outline text="Then perform in-order traversal looking for every end-of-word marker"/>
                <outline text="In order traversal on Ternary search tree">
                  <outline text="Having found last node in prefix">
                    <outline text="Traverse left subtree of the node"/>
                    <outline text="Visit the node itself"/>
                    <outline text="Traverse the node's reference children"/>
                    <outline text="Traverse the right subtree"/>
                  </outline>
                </outline>
              </outline>
            </outline>
            <outline text="Pattern matching">
              <outline text="Can be used to match wildcard patterns in a word"/>
              <outline text="Each time you come across a wild card, rather than looking for a node with a matching letter you instead visit each node as if it was a match"/>
            </outline>
          </outline>
          <outline text="B-Trees">
            <outline text="Overview">
              <outline text="Great if data exists on disk with large indexes"/>
              <outline text="For managing indexes on secondary storage"/>
              <outline text="Contains nodes. Each node contains not one, but multiple keys, up to some defined maximum, usually determined by the size of a disk block"/>
              <outline text="Keys in node are stored in sorted order, with an associated child node holding keys that sort lower than it">
                <outline text="Every non-leaf node containing k keys must have k+1 children"/>
              </outline>
            </outline>
            <outline text="Performance">
              <outline text="Height is lower of a B-Tree, so fewer node traversals and therefore fewer disk I/Os"/>
              <outline text="Order of magnitude better than binary search tree"/>
            </outline>
            <outline text="Lookups">
              <outline text="search must make a choice between multiple children">
                <outline text="Start at root node"/>
                <outline text="Scan through key list, on each key doing similar routine as binary search tree"/>
                <outline text="Follow link to children from key entry if appropriate"/>
              </outline>
            </outline>
            <outline text="Inserting">
              <outline text="Start at root and search all the way down until you reach a leaf node"/>
              <outline text="Insert value in order"/>
              <outline text="If key threshold is exceeded (it becomes full), split it into two condes, each containing half the keys from the original"/>
              <outline text="The &quot;middle&quot; key from the original node is then moved up to the parent and inserted in order with a reference to the newly created node"/>
              <outline text="Repeat if new parent node also full"/>
              <outline text="Whenever the root node is split, a new node is created and becomes the new root"/>
            </outline>
            <outline text="Deletion">
              <outline text="Complex with lots of edge cases - not delved into here"/>
            </outline>
          </outline>
        </outline>
        <outline text="Hashing">
          <outline text="O(1) lookup"/>
          <outline text="hash function takes a value and produced a hash value">
            <outline text="used to locate position within hash table"/>
          </outline>
          <outline text="collisions - different values hashing to same address">
            <outline text="Can increase address space to avoid collisions">
              <outline text="Increased amount of wasted space"/>
            </outline>
            <outline text="Prime numbers for hash table size better than non-primes">
              <outline text="Choose prime close to hash table size"/>
            </outline>
            <outline text="perfect hashing algorithm - one that produces no collisions at all">
              <outline text="very hard to find"/>
            </outline>
            <outline text="CRC is a good hash algorithm">
              <outline text="Add each letter -- working value is multiplied by 31 before eaching each letter">
                <outline text="elvis - ((E * 31 + L) * 31 + V) * 31 + I) * 31 + S)"/>
              </outline>
              <outline text="Take remainder after dividing hash value by the table size (the modulus) - use a close prime - generates final address">
                <outline text="ELVIS % 11"/>
              </outline>
            </outline>
          </outline>
          <outline text="Linear Probing">
            <outline text="Collisions still occur"/>
            <outline text="On detecting a collision, searches linearly for the next available slot and uses that"/>
            <outline text="If reach end, just wrap before start"/>
            <outline text="If table full, must resize and just use linear search"/>
            <outline text="Works well for sparsly populated hash tables"/>
            <outline text="As table members increase though, searching and performance degrade from O(1) to O(N)"/>
          </outline>
          <outline text="Bucketing">
            <outline text="Use buckets to store more than one item"/>
            <outline text="Each bucket holds zero or more items that hash to the same value"/>
            <outline text="Tension between number of buckets, performance, and space taken up"/>
            <outline text="When to resize number of buckets?">
              <outline text="monitor load factor">
                <outline text="ratio of stored values to available buckets"/>
              </outline>
              <outline text="set a threshold load factor that will trigger a resize"/>
              <outline text="Load factor of 0.75 or 75% is good tradeoff between space and time."/>
            </outline>
          </outline>
        </outline>
        <outline text="Sets">
          <outline text="Collection that hold only distinct values; guarantees that an item will not be added more than once"/>
          <outline text="Set operations between two sets">
            <outline text="Union">
              <outline text="Another set containing all values from both (no duplicates)"/>
            </outline>
            <outline text="Intersection">
              <outline text="Values in common"/>
            </outline>
            <outline text="Difference">
              <outline text="Values that are not in common"/>
            </outline>
          </outline>
          <outline text="List Set">
            <outline text="Use a simple list">
              <outline text="Linked list"/>
            </outline>
            <outline text="Simple, add and delete perform in O(N), okay for small amounts of items"/>
          </outline>
          <outline text="Hash Set">
            <outline text="Lots of values and ordering not important, then this is good choice"/>
          </outline>
          <outline text="Tree Set">
            <outline text="Guarantees ordering of data while keeping set semantics"/>
            <outline text="O(log N)"/>
          </outline>
        </outline>
        <outline text="Maps">
          <outline text="Also known as dictionaries, lookup tables, associative arrays"/>
          <outline text="Mapping between key and value"/>
          <outline text="Each key/value pair is an entry"/>
          <outline text="List Map">
            <outline text="Just use a list; simple, but not efficient"/>
            <outline text="O(N)"/>
          </outline>
          <outline text="Hash Map">
            <outline text="Use a hashtable"/>
            <outline text="Each bucket is just a list map"/>
            <outline text="If good hash function, O(1) can be achieved"/>
          </outline>
          <outline text="Tree Map">
            <outline text="When you want a predictable ordering of the keys"/>
            <outline text="Backed by a binary search tree"/>
            <outline text="O(log N)"/>
          </outline>
        </outline>
      </outline>
      <outline text="Algorithms">
        <outline text="Sorting">
          <outline text="All sorting algorithms composed of">
            <outline text="Comparing items to determine whether they are out of order"/>
            <outline text="Moving items into sorted position"/>
            <outline text="Advantages and disadvantages of sorting algorithms are based on how many times these fundamental operations need to be performed and how expensive these operations are in performance terms"/>
          </outline>
          <outline text="Stability">
            <outline text="If a sorting algorithm maintains the relative order of items with a common sort key, it is said to be a stable algorithm"/>
          </outline>
          <outline text="Basic">
            <outline text="Bubble Sort">
              <outline text="Performance">
                <outline text="O(N^2)"/>
                <outline text="Best case is O(N) if list is already sorted"/>
              </outline>
              <outline text="Algorithm">
                <outline text="Repeatedly step through the list"/>
                <outline text="Compare two items at a time and swap them if they are in the wrong order"/>
                <outline text="Pass through list is repeated until no swaps are needed, which means list is sorted"/>
              </outline>
              <outline text="Analysis">
                <outline text="Pretty awful - never use in practice"/>
                <outline text="Stable"/>
              </outline>
            </outline>
            <outline text="Selection Sort">
              <outline text="Performance">
                <outline text="O(N^2)"/>
              </outline>
              <outline text="Algorithm">
                <outline text="Find the minimum value in the list"/>
                <outline text="Swap it with the value in the first position"/>
                <outline text="Repeat the steps above for remainder of the list (starting at the second position)"/>
              </outline>
              <outline text="Analysis">
                <outline text="Pretty awful - never use in practice"/>
                <outline text="Not stable"/>
              </outline>
            </outline>
            <outline text="Insertion Sort">
              <outline text="Performance">
                <outline text="O(N^2), but see Analysis section below for real-world performance"/>
                <outline text="Best case is O(N) if list is already sorted"/>
              </outline>
              <outline text="Algorithm">
                <outline text="Maintain two parts of your list to sort">
                  <outline text="On the left is the sorted portion"/>
                  <outline text="On the right is the unsorted portion"/>
                </outline>
                <outline text="Starting in the unsorted portion, grab the first value"/>
                <outline text="Find the area in the sorted section where it belongs and insert it"/>
                <outline text="Repeat at beginning of unsorted section now"/>
              </outline>
              <outline text="Analysis">
                <outline text="Simple to implement"/>
                <outline text="Efficient on (quite) small data sets"/>
                <outline text="Efficient on data sets which are already substantially sorted: it runs in O(n + d) time, where d is the number of inversions">
                  <outline text="Therefore good for adding something new to a dataset that is already sorted"/>
                </outline>
                <outline text="More efficient in practice than most other simple O(N^2) algorithms such as selection sort or bubble sort: the average time is (N^2)/4 and it is linear in the best case "/>
                <outline text="Stable"/>
                <outline text="In-place (only requires a constant amount O(1) of extra memory space)"/>
                <outline text="It is an online algorithm, in that it can sort a list as it receives it."/>
              </outline>
              <outline text="Implementation">
                <outline text="Pseudocode/Java">
                  <outline text="sort(List list)">
                    <outline text="List results = new LinkedList()"/>
                    <outline text="Iterator it = list.iterator()"/>
                    <outline text="Object current"/>
                    <outline text="for(current = it.first(); it.hasNext(); current = it.next()) ">
                      <outline text="int slot = results.size()"/>
                      <outline text="while(slot &gt; 0)">
                        <outline text="if current comes before (is smaller) than results.get(slot - 1)">
                          <outline text="break"/>
                        </outline>
                        <outline text="slot--"/>
                      </outline>
                      <outline text="results.insert(slot, current)"/>
                    </outline>
                  </outline>
                  <outline text="We scan from end of the list in terms of the already sorted portion. Big advantage when it comes to sorting already sorted or near-sorted lists. Also maintains stability."/>
                </outline>
              </outline>
            </outline>
          </outline>
          <outline text="Advanced">
            <outline text="Shellsort">
              <outline text="Idea">
                <outline text="Arrange data sequence in two-dimensional array"/>
                <outline text="Sort the columns of the array"/>
                <outline text="Repeat steps over and over with narrower array (i.e. less columns)"/>
                <outline text="Continue until just one column"/>
              </outline>
              <outline text="Implementation">
                <outline text="We don't actually have two-dimensional array"/>
                <outline text="Instead, one-dimensional array and just have a 'step' value to grab every k-th element, which simulates a column">
                  <outline text="Also known as a 'gap'"/>
                </outline>
                <outline text="Then, do an insertion sort on this 'column'"/>
                <outline text="Sensitive to choosing 'gap' value">
                  <outline text="Best known is 1, 4, 10, 23, 57, 132, 301, 701">
                    <outline text="Start with larger gaps, than work down"/>
                  </outline>
                  <outline text="You can compute these yourself">
                    <outline text="nextgap = round(gap * 2.3)"/>
                  </outline>
                  <outline text="Pseudocode (Java)">
                    <outline text="public static void shellSort(int[] a)">
                      <outline text="for (int increment = a.length / 2; increment &gt; 0; increment = (increment == 2 ? 1 : (int) Math.round(increment / 2.2)))">
                        <outline text="// could just delegate to an insertionSort method that takes a gap factor"/>
                        <outline text="for (int i = increment; i &lt; a.length; i++)">
                          <outline text="for (int j = i; j &gt;= increment &amp;&amp; a[j - increment] &gt; a[j]; j -= increment)">
                            <outline text="int temp = a[j];"/>
                            <outline text="a[j] = a[j - increment];"/>
                            <outline text="a[j - increment] = temp;&#10;"/>
                          </outline>
                        </outline>
                      </outline>
                    </outline>
                  </outline>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="Worst case O(N log^2 N)">
                  <outline text="Worse than optimal comparison sort, which is O(N log N)"/>
                </outline>
                <outline text="Not stable"/>
              </outline>
            </outline>
            <outline text="Quicksort">
              <outline text="Idea/Algorithm">
                <outline text="Divide and conquer approach to divide a list into two lists"/>
                <outline text="Pick an element, called a pivot, from the list"/>
                <outline text="Reorder the list so that all elements which are less than the pivot come before the pivot and so that all elements greater than the pivot come after it (equal values can go either way). After this partitioning, the pivot is in its final position. This is called the partition operation."/>
                <outline text="Recursively sort the sub-list of lesser elements and the sub-list of greater elements">
                  <outline text="Base case is sub-lists with 0 or 1 elements"/>
                </outline>
                <outline text="Issue is that the above requires O(N) of an extra array">
                  <outline text="Can seriously impact performance in real-world due to extra memory usage"/>
                </outline>
                <outline text="An in-place version is possible"/>
              </outline>
              <outline text="Implementation of in-place version">
                <outline text="Pseudocode">
                  <outline text="function partition(array, left, right, pivotIndex)">
                    <outline text="pivotValue := array[pivotIndex]"/>
                    <outline text="swap( array, pivotIndex, right) // Move pivot to end"/>
                    <outline text="storeIndex := left - 1"/>
                    <outline text="for i  from  left to right-1">
                      <outline text="if array[i] &lt;= pivotValue">
                        <outline text="storeIndex := storeIndex + 1"/>
                        <outline text="swap( array, storeIndex, i)"/>
                      </outline>
                    </outline>
                    <outline text="swap( array, right, storeIndex+1) // Move pivot to its final place"/>
                    <outline text="return storeIndex+1"/>
                  </outline>
                  <outline text="function quicksort(array, left, right)">
                    <outline text="if right &gt; left">
                      <outline text="select a pivot index (e.g. pivotIndex := left)"/>
                      <outline text="pivotNewIndex := partition(array, left, right, pivotIndex)"/>
                      <outline text="quicksort(array, left, pivotNewIndex-1)"/>
                      <outline text="quicksort(array, pivotNewIndex+1, right)"/>
                    </outline>
                  </outline>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="Average case is O(N log N)"/>
                <outline text="Can devolve to a worse case of O(N^2) though">
                  <outline text="Choosing the pivot point is the most important thing"/>
                </outline>
                <outline text="In real world, on real architectures, fastest comparison versus other O(N log N) algorithms"/>
                <outline text="Not stable"/>
              </outline>
            </outline>
            <outline text="Mergesort">
              <outline text="Idea">
                <outline text="Divide the unsorted list into two sublists of about half the size"/>
                <outline text="Divide each of the two sublists recursively until we have list sizes of length 1, in which case the list itself is returned"/>
                <outline text="Merge the two sublists back into one sorted list">
                  <outline text="Most work happens in merge"/>
                </outline>
              </outline>
              <outline text="Implementation">
                <outline text="Pseudocode">
                  <outline text="function mergesort(m)">
                    <outline text="var list left, right, result"/>
                    <outline text=" if length(m) ≤ 1">
                      <outline text="return m"/>
                    </outline>
                    <outline text="else">
                      <outline text="var middle = length(m) / 2"/>
                      <outline text="for each x in m up to middle">
                        <outline text="add x to left"/>
                      </outline>
                      <outline text="for each x in m after middle">
                        <outline text="add x to right"/>
                      </outline>
                      <outline text="left = mergesort(left)"/>
                      <outline text="right = mergesort(right)"/>
                      <outline text="result = merge(left, right)"/>
                    </outline>
                    <outline text="return result"/>
                  </outline>
                </outline>
                <outline text="function merge(left,right)">
                  <outline text="var list result"/>
                  <outline text="while length(left) &gt; 0 and length(right) &gt; 0">
                    <outline text="if first(left) ≤ first(right)">
                      <outline text="append first(left) to result"/>
                      <outline text="left = rest(left)"/>
                    </outline>
                    <outline text="else">
                      <outline text="append first(right) to result"/>
                      <outline text="right = rest(right)"/>
                    </outline>
                  </outline>
                  <outline text="if length(left) &gt; 0 ">
                    <outline text="append rest(left) to result"/>
                  </outline>
                  <outline text="if length(right) &gt; 0 ">
                    <outline text="append rest(right) to result"/>
                  </outline>
                  <outline text="return result"/>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="O(N log N)"/>
                <outline text="stable"/>
              </outline>
            </outline>
          </outline>
        </outline>
        <outline text="Searching/Matching">
          <outline text="Binary searching">
            <outline text="Technique for locating items in a sorted list"/>
            <outline text="Runs in O(log N) time because data is sorted"/>
            <outline text="Basic technique">
              <outline text="Continually divide data in half, progressively narrowing down the search space until you find a match or there are no more items to process"/>
            </outline>
            <outline text="Algorithm">
              <outline text="Start in the middle of the list"/>
              <outline text="Compare the search key with the item at the current location"/>
              <outline text="If search key is current location, return"/>
              <outline text="If search key is less than the item at current location, then value must be in left half - divide the list and use left half, repeating"/>
              <outline text="If search key is greater than the item at current location, value must be in right half - divide the list and use that, repeating"/>
            </outline>
            <outline text="Recursive Binary Searcher">
              <outline text="Just algorithm above"/>
            </outline>
            <outline text="Iterative Binary Searcher">
              <outline text="Recursive search code, for each time it recurses, does little more than modify one of the upper and lower index variables"/>
              <outline text="This means you can get rid of recursion and use a while loop, simply modifying the upper and lower index variables"/>
              <outline text="Pseudocode">
                <outline text="function search(List list, Object key)">
                  <outline text="lowerIndex = 0"/>
                  <outline text="upperIndex = list.size() - 1"/>
                  <outline text="while (lowerIndex &lt;= upperIndex)">
                    <outline text="index = lowerIndex + (upperIndex - lowerIndex) / 2"/>
                    <outline text="if(key == list.get(index)">
                      <outline text="return value"/>
                    </outline>
                    <outline text="else if (key is less than list.get(index))">
                      <outline text="upperIndex = index - 1"/>
                    </outline>
                    <outline text="else">
                      <outline text="lowerIndex = index + 1"/>
                    </outline>
                  </outline>
                  <outline text="if we didnt find anything">
                    <outline text="return null"/>
                  </outline>
                  <outline text="else return index"/>
                </outline>
              </outline>
              <outline text="Optimization">
                <outline text="Continue searching only while you belive there is a chance that the search key might exist"/>
                <outline text="When reach point in the list where the search key would sort before the current item, terminate loop"/>
              </outline>
            </outline>
            <outline text="Performance">
              <outline text="Binary search is O(log N), while linear search is O(N)"/>
              <outline text="Actual performance is excellent as long as you are using a data structure that performs index-based lookup"/>
              <outline text="If using linked list, continual traversal of list kills you"/>
            </outline>
            <outline text="Binary Insertion">
              <outline text="Maintains data in sorted order"/>
              <outline text="Don't want to perform sort after every insertion"/>
              <outline text="Only difference with binary searching is that you find position and then insert"/>
              <outline text="Performance is O(log N!) - (notice the factorial)">
                <outline text="Because you want to be using index-based data structure, must recopy everything to right for insertion, which can become overhead on large datasets"/>
              </outline>
            </outline>
          </outline>
          <outline text="String searching">
            <outline text="Find one strings within another"/>
            <outline text="Brute Force">
              <outline text="Just scan through text, at each position looking for target word"/>
              <outline text="Simple"/>
              <outline text="O(NM), where N is length of text and M is length of pattern"/>
            </outline>
            <outline text="Boyer-Moore Algorithm">
              <outline text="Many moves in brute-force are redundant"/>
              <outline text="Can skip through string faster because it doesn't even have characters in pattern"/>
              <outline text="How many places to skip when you find a mismatch?">
                <outline text="Each time you encounter a failed match, you search the pattern for the last (rightmost) occurence of the offending character and proceed according to the bad-character heuristic:">
                  <outline text="If the character exists within the pattern, you shift right enough places to align the character in the pattern with the one in the text"/>
                  <outline text="If the character doesn't exist within the pattern, you shift right enough places to move just beyond it"/>
                  <outline text="Whenever above two propose negative shift, just move right one before returning to Boyer-Moore algorithm"/>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="If text does not have pattern, then entire length of pattern can be skipped each time, leading to best-case running time of O(N/M), where N is length of text and M is length of pattern"/>
                <outline text="In actuality performs twice or four times faster than brute force version">
                  <outline text="The longer the pattern, the better the performance"/>
                </outline>
              </outline>
              <outline text="Implementation">
                <outline text="Create a last occurrence table (only good for ASCII -- for larger languages not good) so you don't have to scan pattern each time for location">
                  <outline text="Have an array of 26 positions (alphabet), fill each one with -1 by default; for a given position, representing that letter, put the location in the pattern of its rightmost location if present"/>
                </outline>
                <outline text="For search method">
                  <outline text="Scan text from left to right (s)">
                    <outline text="For the letter in the text, scan the pattern from right to left to find portion that doesn't match (i)">
                      <outline text="If we made it to the far left, we have a match -- return"/>
                    </outline>
                    <outline text="Else, nice trick:">
                      <outline text="s += Match.max(i - lastOccurence[c], 1)"/>
                      <outline text="s is the index in the text to search"/>
                      <outline text="i is location of last mismatch"/>
                      <outline text="lastOccurence[] is the lookup table"/>
                      <outline text="Automatically encapsulates rules of algorithm in that one line"/>
                    </outline>
                  </outline>
                </outline>
              </outline>
            </outline>
            <outline text="String Match Iterator"/>
          </outline>
          <outline text="String matching">
            <outline text="Soundex">
              <outline text="Phonetic encoding algorithm">
                <outline text="Takes strings and converts similar sounding words into same encoded value"/>
              </outline>
              <outline text="Others">
                <outline text="Metaphone and Double-Metaphone"/>
              </outline>
              <outline text="Algorithm (English only)">
                <outline text="Process string from left to write"/>
                <outline text="Apply transformation to each letter"/>
                <outline text="Produces 4-letter code of form LDDD, where L is a letter and D is a digit from 0 to 6"/>
                <outline text="Rules:">
                  <outline text="All characters processed as upper case"/>
                  <outline text="Always use first letter"/>
                  <outline text="Drop all other characters if they are A, E, I, O, U, H, W, Y"/>
                  <outline text="Translate remaining letters as follows:">
                    <outline text="B, F, P, V = 1"/>
                    <outline text="C, G, J, K, Q, S, X, Z = 2"/>
                    <outline text="D and T = 3"/>
                    <outline text="L = 4"/>
                    <outline text="M and N = 5"/>
                    <outline text="R = 6"/>
                  </outline>
                  <outline text="Drop consecutive letters having same code"/>
                  <outline text="Pad with zeros if necessary"/>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="O(N)"/>
              </outline>
              <outline text="Implementation">
                <outline text="Have a CHARACTER_MAP that maps the rules above that we can quickly use">
                  <outline text="char[] CHARACTER_MAP = &quot;01230120022455012523010202&quot;.toCharArray()"/>
                  <outline text="Useful for quick lookup"/>
                </outline>
                <outline text="Scan through string, map first letter to result, then do lookup for each letter -- don't copy over matches that are the same number; pad with zeros if not length of 4"/>
              </outline>
            </outline>
            <outline text="Levenshtein Word Distance">
              <outline text="Soundex not good at finding large typing errors"/>
              <outline text="Levenshtein Word Distance also known as edit distance"/>
              <outline text="Compares words for similarity by calculating the smallest number of insertions, deletions, and substitutions to transform one string into another"/>
              <outline text="Choose some limit, such as 4, below which the distance between the two words is short enough to consider"/>
              <outline text="Each operation is assigned a cost, and the smallest distance is the set of changes with the smallest total cost"/>
              <outline text="Make a table">
                <outline text="One word on top (target word), one along the left (source word)"/>
                <outline text="Have cell 0 at 0,0 - run the numbers along the first row and column as well"/>
                <outline text="Calculate values for cells in the grid. Value for each cell from following formula:">
                  <outline text="min(left diagonal + substitution cost, above + delete cost, left + insert cost)"/>
                </outline>
                <outline text="Cost for insertion and deletion always one, but cost for substitution is only one when the source and target characters don't match"/>
                <outline text="Value in bottom-right cell of grid shows minimum distance"/>
                <outline text="Now you can walk from upper-left to bottom-right to get edit sequence">
                  <outline text="Move diagonally is a substitution"/>
                  <outline text="Move right an insertion"/>
                  <outline text="Move down a deletion"/>
                </outline>
              </outline>
              <outline text="Performance">
                <outline text="O(MN), where M is source and target is N">
                  <outline text="Not really good enough for a spell checker"/>
                </outline>
              </outline>
            </outline>
          </outline>
        </outline>
        <outline text="Computational Geometry">
          <outline text="Basic geometry">
            <outline text="Pythagoras' theorem">
              <outline text="a^2 + b^2 = c^2"/>
            </outline>
            <outline text="Formula for line">
              <outline text="y = mx + b"/>
              <outline text="m is slope, b is point at which line cuts y axis"/>
            </outline>
            <outline text="Slope">
              <outline text="How steep it is"/>
              <outline text="Rise">
                <outline text="Vertical distance (y-axis)"/>
              </outline>
              <outline text="Travel">
                <outline text="Horizontal distance (x-axis)"/>
              </outline>
              <outline text="Slope is ratio of rise to travel"/>
              <outline text="Slopes can be negative"/>
              <outline text="Vertical line has slope of infinity"/>
            </outline>
          </outline>
          <outline text="Finding intersection point">
            <outline text="Line one">
              <outline text="y = mx + b"/>
            </outline>
            <outline text="Line two">
              <outline text="y = nx + c"/>
            </outline>
            <outline text="Equation">
              <outline text="x = (c - b) / (m - n)"/>
            </outline>
            <outline text="Solve, then plug x back in to get y"/>
            <outline text="If one of the lines is vertical, then you can just plug y in directly to other formula"/>
          </outline>
          <outline text="Finding closest pair of points">
            <outline text="Dumb approach is O(N^2) - compare every point with other point"/>
            <outline text="Plane Sweep algorithm better">
              <outline text="Considers each point in order from left to right in the coordinate system"/>
              <outline text="Make a single pass, or sweep, across the two-dimensional plane containing the points, remembering the currently known smallest distance and the two points that are separated by that minimum distance"/>
              <outline text="The point being considered as sweep progress is placed at right edge of a rectangular box called the drag net"/>
              <outline text="Width of drag net has a width of d, which is length of shortest known distance -- width is behind box being considered"/>
              <outline text="Now we check distance of point on sweep line with all other points under the drag net to see if they make a smaller distance"/>
              <outline text="If closer pair found, we proceed with smaller drag net until every point processed"/>
            </outline>
            <outline text="Implementation">
              <outline text="Points need to be sorted acording to their coordinates">
                <outline text="place them in a set, so no duplicates, and if same x then consider y when sorting (just make a custom comparator)"/>
              </outline>
              <outline text="Then scan with algorithm"/>
            </outline>
          </outline>
        </outline>
      </outline>
    </outline>
  </body>
</opml>
