开发者

Getting a list of words from a Trie

开发者 https://www.devze.com 2022-12-29 20:44 出处:网络
I\'m looking to use the following code to not check whether there is a word matching in the Trie but to return a list all words beginning with the prefix inputted by the user. Can someone point me in

I'm looking to use the following code to not check whether there is a word matching in the Trie but to return a list all words beginning with the prefix inputted by the user. Can someone point me in the right direction? I can't get it working at all.....

public boolean search(String s)
{
    Node current = root;
    System.out.println("\nSearching for string: "+s);

    while(current != null)
    {
        for(int i=0;i<s.length();i++)
        {               
            if(current.child[(int)(s.charAt(i)-'a')] == null)
            {
                System.out.print开发者_如何学运维ln("Cannot find string: "+s);
                return false;
            }
            else
            {
                current = current.child[(int)(s.charAt(i)-'a')];
                System.out.println("Found character: "+ current.content);
            }
        }
        // If we are here, the string exists.
        // But to ensure unwanted substrings are not found:

        if (current.marker == true)
        {
            System.out.println("Found string: "+s);
            return true;
        }
        else
        {
            System.out.println("Cannot find string: "+s +"(only present as a substring)");
            return false;
        }
    }

    return false; 
}

}


I faced this problem while trying to make a text auto-complete module. I solved the problem by making a Trie in which each node contains it's parent node as well as children. First I searched for the node starting at the input prefix. Then I applied a Traversal on the Trie that explores all the nodes of the sub-tree with it's root as the prefix node. whenever a leaf node is encountered, it means that the end of a word starting from input prefix has been found. Starting from that leaf node I iterate through the parent nodes getting parent of parent, and reach the root of the subtree. While doing so I kept adding the keys of nodes in a stack. In the end I took the prefix and started appended it by popping the stack. I kept on saving the words in an ArrayList. At the end of the traversal I get all the words starting from the input prefix. Here is the code with usage example:

class TrieNode
{
    char c;
    TrieNode parent;
    HashMap<Character, TrieNode> children = new HashMap<Character, TrieNode>();
    boolean isLeaf;

    public TrieNode() {}
    public TrieNode(char c){this.c = c;}
}

-

public class Trie
{
    private TrieNode root;
    ArrayList<String> words; 
    TrieNode prefixRoot;
    String curPrefix;

    public Trie()
    {
        root = new TrieNode();
        words  = new ArrayList<String>();
    }

    // Inserts a word into the trie.
    public void insert(String word) 
    {
        HashMap<Character, TrieNode> children = root.children;

        TrieNode crntparent;

        crntparent = root;

        //cur children parent = root

        for(int i=0; i<word.length(); i++)
        {
            char c = word.charAt(i);

            TrieNode t;
            if(children.containsKey(c)){ t = children.get(c);}
            else
            {
            t = new TrieNode(c);
            t.parent = crntparent;
            children.put(c, t);
            }

            children = t.children;
            crntparent = t;

            //set leaf node
            if(i==word.length()-1)
                t.isLeaf = true;    
        }
    }

    // Returns if the word is in the trie.
    public boolean search(String word)
    {
        TrieNode t = searchNode(word);
        if(t != null && t.isLeaf){return true;}
        else{return false;}
    }

    // Returns if there is any word in the trie
    // that starts with the given prefix.
    public boolean startsWith(String prefix) 
    {
        if(searchNode(prefix) == null) {return false;}
        else{return true;}
    }

    public TrieNode searchNode(String str)
    {
        Map<Character, TrieNode> children = root.children; 
        TrieNode t = null;
        for(int i=0; i<str.length(); i++)
        {
            char c = str.charAt(i);
            if(children.containsKey(c))
            {
                t = children.get(c);
                children = t.children;
            }
            else{return null;}
        }

        prefixRoot = t;
        curPrefix = str;
        words.clear();
        return t;
    }


    ///////////////////////////


  void wordsFinderTraversal(TrieNode node, int offset) 
  {
        //  print(node, offset);

        if(node.isLeaf==true)
        {
          //println("leaf node found");

          TrieNode altair;
          altair = node;

          Stack<String> hstack = new Stack<String>(); 

          while(altair != prefixRoot)
          {
            //println(altair.c);
            hstack.push( Character.toString(altair.c) );
            altair = altair.parent;
          }

          String wrd = curPrefix;

          while(hstack.empty()==false)
          {
            wrd = wrd + hstack.pop();
          }

          //println(wrd);
          words.add(wrd);

        }

         Set<Character> kset = node.children.keySet();
         //println(node.c); println(node.isLeaf);println(kset);
         Iterator itr = kset.iterator();
         ArrayList<Character> aloc = new ArrayList<Character>();

       while(itr.hasNext())
       {
        Character ch = (Character)itr.next();  
        aloc.add(ch);
        //println(ch);
       } 

     // here you can play with the order of the children

       for( int i=0;i<aloc.size();i++)
       {
        wordsFinderTraversal(node.children.get(aloc.get(i)), offset + 2);
       } 

  }


 void displayFoundWords()
 {
   println("_______________");
  for(int i=0;i<words.size();i++)
  {
    println(words.get(i));
  } 
  println("________________");

 }



}//

Example

Trie prefixTree;

prefixTree = new Trie();  

  prefixTree.insert("GOING");
  prefixTree.insert("GONG");
  prefixTree.insert("PAKISTAN");
  prefixTree.insert("SHANGHAI");
  prefixTree.insert("GONDAL");
  prefixTree.insert("GODAY");
  prefixTree.insert("GODZILLA");

  if( prefixTree.startsWith("GO")==true)
  {
    TrieNode tn = prefixTree.searchNode("GO");
    prefixTree.wordsFinderTraversal(tn,0);
    prefixTree.displayFoundWords(); 

  }

  if( prefixTree.startsWith("GOD")==true)
  {
    TrieNode tn = prefixTree.searchNode("GOD");
    prefixTree.wordsFinderTraversal(tn,0);
    prefixTree.displayFoundWords(); 

  }


After building Trie, you could do DFS starting from node, where you found prefix:

Here Node is Trie node, word=till now found word, res = list of words

def dfs(self, node, word, res):
    # Base condition: when at leaf node, add current word into our list
    if EndofWord at node: 
        res.append(word)
        return
    # For each level, go deep down, but DFS fashion 
    # add current char into our current word.
    for w in node:
        self.dfs(node[w], word + w, res)


The simplest solution is to use a depth-first search.

You go down the trie, matching letter by letter from the input. Then, once you have no more letter to match, everything under that node are strings that you want. Recursively explore that whole subtrie, building the string as you go down to its nodes.


This is easier to solve recursively in my opinion. It would go something like this:

  1. Write a recursive function Print that prints all the nodes in the trie rooted in the node you give as parameter. Wiki tells you how to do this (look at sorting).
  2. Find the last character of your prefix, and the node that is labeled with the character, going down from the root in your trie. Call the Print function with this node as the parameter. Then just make sure you also output the prefix before each word, since this will give you all the words without their prefix.

If you don't really care about efficiency, you can just run Print with the main root node and only print those words that start with the prefix you're interested in. This is easier to implement but slower.


You need to traverse the sub-tree starting at the node you found for the prefix.

Start in the same way, i.e. finding the correct node. Then, instead of checking its marker, traverse that tree (i.e. go over all its descendants; a DFS is a good way to do it) , saving the substring used to reach the "current" node from the first node.

If the current node is marked as a word, output* the prefix + substring reached.

* or add it to a list or something.


I built a trie once for one of ITA puzzles

public class WordTree {


class Node {

    private final char ch;

    /**
     * Flag indicates that this node is the end of the string.
     */
    private boolean end;

    private LinkedList<Node> children;

    public Node(char ch) {
        this.ch = ch;
    }

    public void addChild(Node node) {
        if (children == null) {
            children = new LinkedList<Node>();
        }
        children.add(node);
    }

    public Node getNode(char ch) {
        if (children == null) {
            return null;
        }
        for (Node child : children) {
            if (child.getChar() == ch) {
                return child;
            }
        }
        return null;
    }

    public char getChar() {
        return ch;
    }

    public List<Node> getChildren() {
        if (this.children == null) {
            return Collections.emptyList();
        }
        return children;
    }

    public boolean isEnd() {
        return end;
    }

    public void setEnd(boolean end) {
        this.end = end;
    }
}


Node root = new Node(' ');

public WordTree() {
}

/**
 * Searches for a strings that match the prefix.
 *
 * @param prefix - prefix
 * @return - list of strings that match the prefix, or empty list of no matches are found.
 */
public List<String> getWordsForPrefix(String prefix) {
    if (prefix.length() == 0) {
        return Collections.emptyList();
    }
    Node node = getNodeForPrefix(root, prefix);
    if (node == null) {
        return Collections.emptyList();
    }
    List<LinkedList<Character>> chars = collectChars(node);
    List<String> words = new ArrayList<String>(chars.size());
    for (LinkedList<Character> charList : chars) {
        words.add(combine(prefix.substring(0, prefix.length() - 1), charList));
    }
    return words;
}


private String combine(String prefix, List<Character> charList) {
    StringBuilder sb = new StringBuilder(prefix);
    for (Character character : charList) {
        sb.append(character);
    }
    return sb.toString();
}


private Node getNodeForPrefix(Node node, String prefix) {
    if (prefix.length() == 0) {
        return node;
    }
    Node next = node.getNode(prefix.charAt(0));
    if (next == null) {
        return null;
    }
    return getNodeForPrefix(next, prefix.substring(1, prefix.length()));
}


private List<LinkedList<Character>> collectChars(Node node) {
    List<LinkedList<Character>> chars = new ArrayList<LinkedList<Character>>();

    if (node.getChildren().size() == 0) {
        chars.add(new LinkedList<Character>(Collections.singletonList(node.getChar())));
    } else {
        if (node.isEnd()) {
            chars.add(new LinkedList<Character> 
            Collections.singletonList(node.getChar())));
        }
        List<Node> children = node.getChildren();
        for (Node child : children) {
            List<LinkedList<Character>> childList = collectChars(child);
            for (LinkedList<Character> characters : childList) {
                characters.push(node.getChar());
                chars.add(characters);
            }
        }
    }
    return chars;
}


public void addWord(String word) {
    addWord(root, word);
}

private void addWord(Node parent, String word) {
    if (word.trim().length() == 0) {
        return;
    }
    Node child = parent.getNode(word.charAt(0));
    if (child == null) {
        child = new Node(word.charAt(0));
        parent.addChild(child);
    } if (word.length() == 1) {
        child.setEnd(true);
    } else {
        addWord(child, word.substring(1, word.length()));
    }
}


public static void main(String[] args) {
    WordTree tree = new WordTree();
    tree.addWord("world");
    tree.addWord("work");
    tree.addWord("wolf");
    tree.addWord("life");
    tree.addWord("love");
    System.out.println(tree.getWordsForPrefix("wo"));
}

}


You would need to use a List
List<String> myList = new ArrayList<String>();
if(matchingStringFound)
myList.add(stringToAdd);


After your for loop, add a call to printAllStringsInTrie(current, s);

void printAllStringsInTrie(Node t, String prefix) {
  if (t.current_marker) System.out.println(prefix);
  for (int i = 0; i < t.child.length; i++) {
    if (t.child[i] != null) {
      printAllStringsInTrie(t.child[i], prefix + ('a' + i));  // does + work on (String, char)?
    }
  }
}


The below recursive code can be used where your TrieNode is like this: This code works fine.

TrieNode(char c)
{

        this.con=c;
        this.isEnd=false;
        list=new ArrayList<TrieNode>();
        count=0;

}

//--------------------------------------------------

public void Print(TrieNode root1, ArrayList<Character> path)
{

      if(root1==null)
          return;

      if(root1.isEnd==true)
      {
          //print the entire path
          ListIterator<Character> itr1=path.listIterator();
          while(itr1.hasNext())
          {
              System.out.print(itr1.next());
          }
          System.out.println();
          return;
      }
      else{
          ListIterator<TrieNode> itr=root1.list.listIterator();
          while(itr.hasNext())
          {
              TrieNode child=itr.next();
              path.add(child.con);
              Print(child,path);
              path.remove(path.size()-1);

            }
      }


Simple recursive DFS algorithm can be used to find all words for a given prefix.

Sample Trie Node:

static class TrieNode {
    Map<Character, TrieNode> children = new HashMap<>();
    boolean isWord = false;
}

Method to find all words for a given prefix:

static List<String> findAllWordsForPrefix(String prefix, TrieNode root) {
    List<String> words = new ArrayList<>();
    TrieNode current = root;
    for(Character c: prefix.toCharArray()) {
        TrieNode nextNode = current.children.get(c);
        if(nextNode == null) return words;
        current = nextNode;
    }
    if(!current.children.isEmpty()) {
        findAllWordsForPrefixRecursively(prefix, current, words);
    } else {
        if(current.isWord) words.add(prefix);
    }
    return words;
}

static void findAllWordsForPrefixRecursively(String prefix, TrieNode node, List<String> words) {
    if(node.isWord) words.add(prefix);
    if(node.children.isEmpty()) {
        return;
    }
    for(Character c: node.children.keySet()) {
        findAllWordsForPrefixRecursively(prefix + c, node.children.get(c), words);
    }
}

Complete code can be found at below: TrieDataStructure Example

0

精彩评论

暂无评论...
验证码 换一张
取 消