集合的一个关键的特点就是不能存放重复的元素,二分搜索树是一个非常好的实现集合的底层数据结构
1、二分搜索树实现集合:
set接口
package Set;public interface Set<E> {void add(E e);boolean contains(E e);void remove(E e);int getSize();boolean isEmpty();
}
BST.class
package Set;import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;public class BST<E extends Comparable<E>> {private class Node{public E e;public Node left, right;public Node(E e){this.e = e;left = null;right = null;}}private Node root;private int size;public BST(){root = null;size = 0;}public int size(){return size;}public boolean isEmpty(){return size == 0;}// 向二分搜索树中添加新的元素epublic void add(E e){root = add(root, e);}// 向以node为根的二分搜索树中插入元素e,递归算法// 返回插入新节点后二分搜索树的根private Node add(Node node, E e){if(node == null){size ++;return new Node(e);}if(e.compareTo(node.e) < 0)node.left = add(node.left, e);else if(e.compareTo(node.e) > 0)node.right = add(node.right, e);return node;}// 看二分搜索树中是否包含元素epublic boolean contains(E e){return contains(root, e);}// 看以node为根的二分搜索树中是否包含元素e, 递归算法private boolean contains(Node node, E e){if(node == null)return false;if(e.compareTo(node.e) == 0)return true;else if(e.compareTo(node.e) < 0)return contains(node.left, e);else // e.compareTo(node.e) > 0return contains(node.right, e);}// 二分搜索树的前序遍历public void preOrder(){preOrder(root);}// 前序遍历以node为根的二分搜索树, 递归算法private void preOrder(Node node){if(node == null)return;System.out.println(node.e);preOrder(node.left);preOrder(node.right);}// 二分搜索树的非递归前序遍历public void preOrderNR(){Stack<Node> stack = new Stack<>();stack.push(root);while(!stack.isEmpty()){Node cur = stack.pop();System.out.println(cur.e);if(cur.right != null)stack.push(cur.right);if(cur.left != null)stack.push(cur.left);}}// 二分搜索树的中序遍历public void inOrder(){inOrder(root);}// 中序遍历以node为根的二分搜索树, 递归算法private void inOrder(Node node){if(node == null)return;inOrder(node.left);System.out.println(node.e);inOrder(node.right);}// 二分搜索树的后序遍历public void postOrder(){postOrder(root);}// 后序遍历以node为根的二分搜索树, 递归算法private void postOrder(Node node){if(node == null)return;postOrder(node.left);postOrder(node.right);System.out.println(node.e);}// 二分搜索树的层序遍历public void levelOrder(){Queue<Node> q = new LinkedList<>();q.add(root);while(!q.isEmpty()){Node cur = q.remove();System.out.println(cur.e);if(cur.left != null)q.add(cur.left);if(cur.right != null)q.add(cur.right);}}// 寻找二分搜索树的最小元素public E minimum(){if(size == 0)throw new IllegalArgumentException("BST is empty!");return minimum(root).e;}// 返回以node为根的二分搜索树的最小值所在的节点private Node minimum(Node node){if(node.left == null)return node;return minimum(node.left);}// 寻找二分搜索树的最大元素public E maximum(){if(size == 0)throw new IllegalArgumentException("BST is empty");return maximum(root).e;}// 返回以node为根的二分搜索树的最大值所在的节点private Node maximum(Node node){if(node.right == null)return node;return maximum(node.right);}// 从二分搜索树中删除最小值所在节点, 返回最小值public E removeMin(){E ret = minimum();root = removeMin(root);return ret;}// 删除掉以node为根的二分搜索树中的最小节点// 返回删除节点后新的二分搜索树的根private Node removeMin(Node node){if(node.left == null){Node rightNode = node.right;node.right = null;size --;return rightNode;}node.left = removeMin(node.left);return node;}// 从二分搜索树中删除最大值所在节点public E removeMax(){E ret = maximum();root = removeMax(root);return ret;}// 删除掉以node为根的二分搜索树中的最大节点// 返回删除节点后新的二分搜索树的根private Node removeMax(Node node){if(node.right == null){Node leftNode = node.left;node.left = null;size --;return leftNode;}node.right = removeMax(node.right);return node;}// 从二分搜索树中删除元素为e的节点public void remove(E e){root = remove(root, e);}// 删除掉以node为根的二分搜索树中值为e的节点, 递归算法// 返回删除节点后新的二分搜索树的根private Node remove(Node node, E e){if( node == null )return null;if( e.compareTo(node.e) < 0 ){node.left = remove(node.left , e);return node;}else if(e.compareTo(node.e) > 0 ){node.right = remove(node.right, e);return node;}else{ // e.compareTo(node.e) == 0// 待删除节点左子树为空的情况if(node.left == null){Node rightNode = node.right;node.right = null;size --;return rightNode;}// 待删除节点右子树为空的情况if(node.right == null){Node leftNode = node.left;node.left = null;size --;return leftNode;}// 待删除节点左右子树均不为空的情况// 找到比待删除节点大的最小节点, 即待删除节点右子树的最小节点// 用这个节点顶替待删除节点的位置Node successor = minimum(node.right);successor.right = removeMin(node.right);successor.left = node.left;node.left = node.right = null;return successor;}}@Overridepublic String toString(){StringBuilder res = new StringBuilder();generateBSTString(root, 0, res);return res.toString();}// 生成以node为根节点,深度为depth的描述二叉树的字符串private void generateBSTString(Node node, int depth, StringBuilder res){if(node == null){res.append(generateDepthString(depth) + "null\n");return;}res.append(generateDepthString(depth) + node.e +"\n");generateBSTString(node.left, depth + 1, res);generateBSTString(node.right, depth + 1, res);}private String generateDepthString(int depth){StringBuilder res = new StringBuilder();for(int i = 0 ; i < depth ; i ++)res.append("--");return res.toString();}
}
BSTSet.class
package Set;public class BSTSet<E extends Comparable<E>> implements Set<E> {private BST<E> bst;public BSTSet() {bst = new BST<E>();}@Overridepublic int getSize() {return bst.size();}@Overridepublic boolean isEmpty() {return bst.isEmpty();}@Overridepublic void add(E e) {bst.add(e);}@Overridepublic boolean contains(E e) {return bst.contains(e);}@Overridepublic void remove(E e) {bst.remove(e);}
}
例:词汇量统计 相同的词汇量只统计一次
实现原理:
我们先读取《傲慢与偏见》这本书的所有单词,然后将它存放在一个数组里,这个数组里面的单词是包括重复的,然后我们再建一个以二分搜索树实现的集合Set,遍历数组,将里面的所有单词再放在集合set中,因为二分搜索树不允许包括重复的元素,所以这时就可以实现相同的统计量只统计一次了。
FileOperation.class
package Set;import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Locale;
import java.util.Scanner;// 文件相关操作
public class FileOperation {// 读取文件名称为filename中的内容,并将其中包含的所有词语放进words中public static boolean readFile(String filename, ArrayList<String> words) {if (filename == null || words == null) {System.out.println("filename is null or words is null");return false;}// 文件读取Scanner scanner;try {File file = new File(filename);if (file.exists()) {FileInputStream fis = new FileInputStream(file);scanner = new Scanner(new BufferedInputStream(fis), "UTF-8");scanner.useLocale(Locale.ENGLISH);} elsereturn false;} catch (IOException ioe) {System.out.println("Cannot open " + filename);return false;}// 简单分词// 这个分词方式相对简陋, 没有考虑很多文本处理中的特殊问题// 在这里只做demo展示用if (scanner.hasNextLine()) {String contents = scanner.useDelimiter("\\A").next();int start = firstCharacterIndex(contents, 0);for (int i = start + 1; i <= contents.length(); )if (i == contents.length() || !Character.isLetter(contents.charAt(i))) {String word = contents.substring(start, i).toLowerCase();words.add(word);start = firstCharacterIndex(contents, i);i = start + 1;} elsei++;}return true;}// 寻找字符串s中,从start的位置开始的第一个字母字符的位置private static int firstCharacterIndex(String s, int start) {for (int i = start; i < s.length(); i++)if (Character.isLetter(s.charAt(i)))return i;return s.length();}
}
测试Main.class
package Set;import java.util.ArrayList;public class Main {public static void main(String[] args) {System.out.println("Pride and Prejudice");ArrayList<String> words1 = new ArrayList<>();if (FileOperation.readFile("pride-and-prejudice.txt", words1)) {System.out.println("Total words 包含重复的: " + words1.size());BSTSet<String> set1 = new BSTSet<String>();for (String word : words1) {set1.add(word);}System.out.println("Total different words: " + set1.getSize());}System.out.println();System.out.println("A Tale of Two Cities");ArrayList<String> words2 = new ArrayList<>();if (FileOperation.readFile("a-tale-of-two-cities.txt", words2)) {System.out.println("Total words: " + words2.size());BSTSet<String> set2 = new BSTSet<String>();for (String word : words2)set2.add(word);System.out.println("Total different words: " + set2.getSize());}}
}
测试结果:
2、链表实现集合:
LinkedList.class
package Set;public class LinkedList<E> {private class Node{public E e;public Node next;public Node(E e, Node next){this.e = e;this.next = next;}public Node(E e){this(e, null);}public Node(){this(null, null);}@Overridepublic String toString(){return e.toString();}}private Node dummyHead;private int size;public LinkedList(){dummyHead = new Node();size = 0;}// 获取链表中的元素个数public int getSize(){return size;}// 返回链表是否为空public boolean isEmpty(){return size == 0;}// 在链表的index(0-based)位置添加新的元素e// 在链表中不是一个常用的操作,练习用:)public void add(int index, E e){if(index < 0 || index > size)throw new IllegalArgumentException("Add failed. Illegal index.");Node prev = dummyHead;for(int i = 0 ; i < index ; i ++)prev = prev.next;prev.next = new Node(e, prev.next);size ++;}// 在链表头添加新的元素epublic void addFirst(E e){add(0, e);}// 在链表末尾添加新的元素epublic void addLast(E e){add(size, e);}// 获得链表的第index(0-based)个位置的元素// 在链表中不是一个常用的操作,练习用:)public E get(int index){if(index < 0 || index >= size)throw new IllegalArgumentException("Get failed. Illegal index.");Node cur = dummyHead.next;for(int i = 0 ; i < index ; i ++)cur = cur.next;return cur.e;}// 获得链表的第一个元素public E getFirst(){return get(0);}// 获得链表的最后一个元素public E getLast(){return get(size - 1);}// 修改链表的第index(0-based)个位置的元素为e// 在链表中不是一个常用的操作,练习用:)public void set(int index, E e){if(index < 0 || index >= size)throw new IllegalArgumentException("Set failed. Illegal index.");Node cur = dummyHead.next;for(int i = 0 ; i < index ; i ++)cur = cur.next;cur.e = e;}// 查找链表中是否有元素epublic boolean contains(E e){Node cur = dummyHead.next;while(cur != null){if(cur.e.equals(e))return true;cur = cur.next;}return false;}// 从链表中删除index(0-based)位置的元素, 返回删除的元素// 在链表中不是一个常用的操作,练习用:)public E remove(int index){if(index < 0 || index >= size)throw new IllegalArgumentException("Remove failed. Index is illegal.");Node prev = dummyHead;for(int i = 0 ; i < index ; i ++)prev = prev.next;Node retNode = prev.next;prev.next = retNode.next;retNode.next = null;size --;return retNode.e;}// 从链表中删除第一个元素, 返回删除的元素public E removeFirst(){return remove(0);}// 从链表中删除最后一个元素, 返回删除的元素public E removeLast(){return remove(size - 1);}// 从链表中删除元素epublic void removeElement(E e){Node prev = dummyHead;while(prev.next != null){if(prev.next.e.equals(e))break;prev = prev.next;}if(prev.next != null){Node delNode = prev.next;prev.next = delNode.next;delNode.next = null;size --;}}@Overridepublic String toString(){StringBuilder res = new StringBuilder();Node cur = dummyHead.next;while(cur != null){res.append(cur + "->");cur = cur.next;}res.append("NULL");return res.toString();}
}
LinkedListSet.class
package Set;import java.util.ArrayList;public class LinkedListSet<E> implements Set<E> {private LinkedList<E> list;public LinkedListSet() {list = new LinkedList<E>();}@Overridepublic int getSize() {return list.getSize();}@Overridepublic boolean isEmpty() {return list.isEmpty();}@Overridepublic void add(E e) {if (!list.contains(e))list.addFirst(e);}@Overridepublic boolean contains(E e) {return list.contains(e);}@Overridepublic void remove(E e) {list.removeElement(e);}public static void main(String[] args) {System.out.println("Pride and Prejudice");ArrayList<String> words1 = new ArrayList<>();if (FileOperation.readFile("pride-and-prejudice.txt", words1)) {System.out.println("Total words: " + words1.size());LinkedListSet<String> set1 = new LinkedListSet<String>();for (String word : words1)set1.add(word);System.out.println("Total different words: " + set1.getSize());}System.out.println();System.out.println("A Tale of Two Cities");ArrayList<String> words2 = new ArrayList<>();if (FileOperation.readFile("a-tale-of-two-cities.txt", words2)) {System.out.println("Total words: " + words2.size());LinkedListSet<String> set2 = new LinkedListSet<String>();for (String word : words2)set2.add(word);System.out.println("Total different words: " + set2.getSize());}}
}
3、两种实现方式时间复杂度的对比
package Set;import java.util.ArrayList;//时间复杂度对比
public class TimeComplexityofSet {private static double testSet(Set<String> set, String filename){long startTime = System.nanoTime();System.out.println(filename);ArrayList<String> words = new ArrayList<>();if(FileOperation.readFile(filename, words)) {System.out.println("Total words: " + words.size());for (String word : words)set.add(word);System.out.println("Total different words: " + set.getSize());}long endTime = System.nanoTime();return (endTime - startTime) / 1000000000.0;}public static void main(String[] args) {String filename = "pride-and-prejudice.txt";BSTSet<String> bstSet = new BSTSet<String>();double time1 = testSet(bstSet, filename);System.out.println("BST Set: " + time1 + " s");System.out.println();LinkedListSet<String> linkedListSet = new LinkedListSet<String>();double time2 = testSet(linkedListSet, filename);System.out.println("Linked List Set: " + time2 + " s");}
}
通过测试可以看出二分搜索树是高效的
二分搜索树的最坏情况,这样的二分搜索树和链表是无异的。换句话来说,二分搜索树可能退化成链表。如下: