Как использовать структуру данных Trie, чтобы найти сумму LCP для всех возможных подстрок?

Описание проблемы: Ссылки: Развлечение со строками

Основываясь на описании проблемы, наивный подход к нахождению суммы длин LCP для всех возможных подстрок (для данной строки) выглядит следующим образом:

#include <cstring>
#include <iostream>

using std::cout;
using std::cin;
using std::endl;
using std::string;

int lcp(string str1, string str2) 
{ 
    string result; 
    int n1 = str1.length(), n2 = str2.length(); 

    // Compare str1 and str2 
    for (int i=0, j=0; i<=n1-1 && j<=n2-1; i++,j++) 
    { 
        if (str1[i] != str2[j]) 
            break; 
        result.push_back(str1[i]); 
    } 

    return (result.length()); 
} 

int main()
{
    string s;
    cin>>s;
    int sum = 0;

    for(int i = 0; i < s.length(); i++)
        for(int j = i; j < s.length(); j++)
            for(int k = 0; k < s.length(); k++)
                for(int l = k; l < s.length(); l++)
                    sum += lcp(s.substr(i,j - i + 1),s.substr(k,l - k + 1));
    cout<<sum<<endl;     
    return 0;
}

Основываясь на дальнейшем чтении и исследованиях LCP, я нашел это документ, определяющий способ эффективного поиска LCP с использованием расширенной структуры данных, называемой Tries. Я реализовал Trie и Compressed Trie (Suffix Tree) следующим образом:

#include <iostream>
#include <cstring>

using std::cout;
using std::cin;
using std::endl;
using std::string;
const int ALPHA_SIZE = 26;

struct TrieNode
{
    struct TrieNode *children[ALPHA_SIZE];
    string label;
    bool isEndOfWord;
};
typedef struct TrieNode Trie;

Trie *getNode(void)
{
    Trie *parent = new Trie;
    parent->isEndOfWord = false;
    parent->label = "";
    for(int i = 0; i <ALPHA_SIZE; i++)
        parent->children[i] = NULL;

    return parent;
}

void insert(Trie *root, string key)
{
    Trie *temp = root;

    for(int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if(!temp->children[index])
        {
            temp->children[index] = getNode();
            temp->children[index]->label = key[i];
        }
        temp = temp->children[index];
        temp->isEndOfWord = false;
    }
    temp->isEndOfWord = true;
}

int countChildren(Trie *node, int *index)
{
    int count = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(node->children[i] != NULL)
        {
            count++;
            *index = i;
        }
    }
    return count;
}

void display(Trie *root)
{
    Trie *temp = root;
    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i] != NULL)
        {
            cout<<temp->label<<"->"<<temp->children[i]->label<<endl;
            if(!temp->isEndOfWord)
                display(temp->children[i]);
        }
    }
}

void compress(Trie *root)
{
    Trie *temp = root;
    int index = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i])
        {
            Trie *child = temp->children[i];

            if(!child->isEndOfWord)
            {
                if(countChildren(child,&index) >= 2)
                {
                    compress(child);
                }
                else if(countChildren(child,&index) == 1)
                {
                    while(countChildren(child,&index) < 2 and countChildren(child,&index) > 0)
                    {
                        Trie *sub_child = child->children[index];

                        child->label = child->label + sub_child->label;
                        child->isEndOfWord = sub_child->isEndOfWord;
                        memcpy(child->children,sub_child->children,sizeof(sub_child->children));

                        delete(sub_child);
                    }
                    compress(child);
                }
            }
        }
    }
}

bool search(Trie *root, string key)
{
    Trie *temp = root;

    for(int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if(!temp->children[index])
            return false;
        temp = temp->children[index];
    }
    return (temp != NULL && temp->isEndOfWord);
}

int main()
{
    string input;
    cin>>input;

    Trie *root = getNode();

    for(int i = 0; i < input.length(); i++)
        for(int j = i; j < input.length(); j++)
        {
            cout<<"Substring : "<<input.substr(i,j - i + 1)<<endl;
            insert(root, input.substr(i,j - i + 1));
        }

    cout<<"DISPLAY"<<endl;
    display(root);

    compress(root);
    cout<<"AFTER COMPRESSION"<<endl;
    display(root);

    return 0;
}

Мой вопрос в том, как мне найти длину LCP. Я могу получить LCP, получив поле метки в узле ветвления, но как мне подсчитать длину LCP для всех возможных подстрок?

Один из способов, о котором я подумал, заключался в том, как использовать узел ветвления, его поле метки, которое содержит LCP, и дочерние узлы узла ветвления, чтобы найти сумму всех длин LCP (самый низкий общий предок?). Но я все еще в замешательстве. Как мне поступить дальше?

Примечание. Также возможно, что мой подход к этой проблеме неверен, поэтому, пожалуйста, предложите другие методы для этой проблемы (учитывая временную и пространственную сложность).

Ссылка на похожие вопросы без ответа:

Ссылки по коду и теории:

Обновление 1:

Основываясь на ответе @Adarsh Anurag, я придумал следующую реализацию с помощью структуры данных trie,

#include <iostream>
#include <cstring>
#include <stack>

using std::cout;
using std::cin;
using std::endl;
using std::string;
using std::stack;

const int ALPHA_SIZE = 26;
int sum = 0;
stack <int> lcp;

struct TrieNode
{
    struct TrieNode *children[ALPHA_SIZE];
    string label;
    int count;
};
typedef struct TrieNode Trie;

Trie *getNode(void)
{
    Trie *parent = new Trie;
    parent->count = 0;
    parent->label = "";
    for(int i = 0; i <ALPHA_SIZE; i++)
        parent->children[i] = NULL;

    return parent;
}

void insert(Trie *root, string key)
{
    Trie *temp = root;

    for(int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if(!temp->children[index])
        {
            temp->children[index] = getNode();
            temp->children[index]->label = key[i];
        }
        temp = temp->children[index];
    }
    temp->count++;
}

int countChildren(Trie *node, int *index)
{
    int count = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(node->children[i] != NULL)
        {
            count++;
            *index = i;
        }
    }
    return count;
}

void display(Trie *root)
{
    Trie *temp = root;
    int index = 0;
    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i] != NULL)
        {
            cout<<temp->label<<"->"<<temp->children[i]->label<<endl;
            cout<<"CountOfChildren:"<<countChildren(temp,&index)<<endl;
            cout<<"Counter:"<<temp->children[i]->count<<endl;

            display(temp->children[i]);
        }
    }
}

void lcp_sum(Trie *root,int counter,string lcp_label)
{
    Trie *temp = root;
    int index = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i])
        {
            Trie *child = temp->children[i];

            if(lcp.empty())
            {
                lcp_label = child->label;
                counter = 0;

                lcp.push(child->count*lcp_label.length());
                sum += lcp.top();
                counter += 1;
            }
            else
            {
                lcp_label = lcp_label + child->label;
                stack <int> temp = lcp;

                while(!temp.empty())
                {
                    sum = sum + 2 * temp.top() * child->count;
                    temp.pop();
                }

                lcp.push(child->count*lcp_label.length());
                sum += lcp.top();
                counter += 1;
            }

            if(countChildren(child,&index) > 1)
            {
                lcp_sum(child,0,lcp_label);
            }
            else if (countChildren(child,&index) == 1)
                lcp_sum(child,counter,lcp_label);
            else
            {
                while(counter-- && !lcp.empty())
                    lcp.pop();
            }
        }
    }
}

int main()
{
    string input;
    cin>>input;

    Trie *root = getNode();

    for(int i = 0; i < input.length(); i++)
        for(int j = i; j < input.length(); j++)
        {
            cout<<"Substring : "<<input.substr(i,j - i + 1)<<endl;
            insert(root, input.substr(i,j - i + 1));
            display(root);
        }

    cout<<"DISPLAY"<<endl;
    display(root);

    cout<<"COUNT"<<endl;
    lcp_sum(root,0,"");
    cout<<sum<<endl;

    return 0;
}

Из структуры Trie я удалил переменную isEndOfWord и заменил ее на counter. Эта переменная отслеживает повторяющиеся подстроки, что должно помочь в подсчете LCP для строк с повторяющимися символами. Однако приведенная выше реализация работает только для строк с разными символами. Я попытался реализовать метод, предложенный @Adarsh для повторяющихся символов, но не удовлетворяет ни одному тестовому случаю.

Обновление 2:

На основе дальнейшего обновленного ответа от @Adarsh и «проб и ошибок» с различными тестовыми примерами я, кажется, немного продвинулся в отношении повторяющихся символов, однако он все еще не работает должным образом. Вот реализация с комментариями,

// LCP : Longest Common Prefix
// DFS : Depth First Search

#include <iostream>
#include <cstring>
#include <stack>
#include <queue>

using std::cout;
using std::cin;
using std::endl;
using std::string;
using std::stack;
using std::queue;

const int ALPHA_SIZE = 26;
int sum = 0;     // Global variable for LCP sum
stack <int> lcp; //Keeps track of current LCP

// Trie Data Structure Implementation (See References Section)
struct TrieNode
{
    struct TrieNode *children[ALPHA_SIZE]; // Search space can be further reduced by keeping track of required indicies
    string label;
    int count; // Keeps track of repeat substrings
};
typedef struct TrieNode Trie;

Trie *getNode(void)
{
    Trie *parent = new Trie;
    parent->count = 0;
    parent->label = ""; // Root Label at level 0 is an empty string
    for(int i = 0; i <ALPHA_SIZE; i++)
        parent->children[i] = NULL;

    return parent;
}

void insert(Trie *root, string key)
{
    Trie *temp = root;

    for(int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';   // Lowercase alphabets only
        if(!temp->children[index])
        {
            temp->children[index] = getNode();
            temp->children[index]->label = key[i]; // Label represents the character being inserted into the node
        }
        temp = temp->children[index];
    }
    temp->count++;
}

// Returns the count of child nodes for a given node
int countChildren(Trie *node, int *index)
{
    int count = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(node->children[i] != NULL)
        {
            count++;
            *index = i; //Not required for this problem, used in compressed trie implementation
        }
    }
    return count;
}

// Displays the Trie in DFS manner
void display(Trie *root)
{
    Trie *temp = root;
    int index = 0;
    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i] != NULL)
        {
            cout<<temp->label<<"->"<<temp->children[i]->label<<endl; // Display in this format : Root->Child
            cout<<"CountOfChildren:"<<countChildren(temp,&index)<<endl; // Count of Child nodes for Root
            cout<<"Counter:"<<temp->children[i]->count<<endl; // Count of repeat substrings for a given node
            display(temp->children[i]);
        }
    }
}

/* COMPRESSED TRIE IMPLEMENTATION
void compress(Trie *root)
{
    Trie *temp = root;
    int index = 0;

    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        if(temp->children[i])
        {
            Trie *child = temp->children[i];

            //if(!child->isEndOfWord)
            {
                if(countChildren(child,&index) >= 2)
                {
                    compress(child);
                }
                else if(countChildren(child,&index) == 1)
                {
                    while(countChildren(child,&index) < 2 and countChildren(child,&index) > 0)
                    {
                        Trie *sub_child = child->children[index];

                        child->label = child->label + sub_child->label;
                        //child->isEndOfWord = sub_child->isEndOfWord;
                        memcpy(child->children,sub_child->children,sizeof(sub_child->children));

                        delete(sub_child);
                    }
                    compress(child);
                }
            }
        }
    }
}
*/

// Calculate LCP Sum recursively
void lcp_sum(Trie *root,int *counter,string lcp_label,queue <int> *s_count)
{
    Trie *temp = root;
    int index = 0;

    // Traverse through this root's children array, to find child nodes
    for(int i = 0; i < ALPHA_SIZE; i++)
    {
        // If child nodes found, then ...
        if(temp->children[i] != NULL)
        {
            Trie *child = temp->children[i];

            // Check if LCP stack is empty
            if(lcp.empty())
            {
                lcp_label = child->label;   // Set LCP label as Child's label
                *counter = 0;               // To make sure counter is not -1 during recursion

                /*
                    * To include LCP of repeat substrings, multiply the count variable with current LCP Label's length
                    * Push this to a stack called lcp
                */
                lcp.push(child->count*lcp_label.length());

                // Add LCP for (a,a)
                sum += lcp.top() * child->count; // Formula to calculate sum for repeat substrings : (child->count) ^ 2 * LCP Label's Length
                *counter += 1; // Increment counter, this is used further to pop elements from the stack lcp, when a branching node is encountered
            }
            else
            {
                lcp_label = lcp_label + child->label; // If not empty, then add Child's label to LCP label
                stack <int> temp = lcp; // Temporary Stack

                /*
                    To calculate LCP for different combinations of substrings,
                    2 -> accounts for (a,b) and (b,a)
                    temp->top() -> For previous substrings and their combinations with the current substring
                    child->count() -> For any repeat substrings for current node/substring
                */
                while(!temp.empty())
                {
                    sum = sum + 2 * temp.top() * child->count;
                    temp.pop();
                }

                // Similar to above explanation for if block
                lcp.push(child->count*lcp_label.length());
                sum += lcp.top() * child->count;
                *counter += 1;
            }

            // If a branching node is encountered
            if(countChildren(child,&index) > 1)
            {
                int lc = 0; // dummy variable
                queue <int> ss_count; // queue to keep track of substrings (counter) from the child node of the branching node
                lcp_sum(child,&lc,lcp_label,&ss_count); // Recursively calculate LCP for child node

                // This part is experimental, does not work for all testcases
                // Used to calculate the LCP count for substrings between child nodes of the branching node
                if(countChildren(child,&index) == 2)
                {
                    int counter_queue = ss_count.front();
                    ss_count.pop();

                    while(counter_queue--)
                    {
                        sum = sum +  2 * ss_count.front() * lcp_label.length();
                        ss_count.pop();
                    }
                }
                else
                {
                    // Unclear, what happens if children is > 3
                    // Should one take combination of each child node with one another ?
                    while(!ss_count.empty())
                    {
                        sum = sum +  2 * ss_count.front() * lcp_label.length();
                        ss_count.pop();
                    }
                }

                lcp_label = temp->label; // Set LCP label back to Root's Label

                // Empty the stack till counter is 0, so as to restore it's state when it first entered the child node from the branching node
                while(*counter)
                {
                    lcp.pop();
                    *counter -=1;
                }
                continue; // Continue to next child of the branching node
            }
            else if (countChildren(child,&index) == 1)
            {
                // If count of children is 1, then recursively calculate LCP for further child node
                lcp_sum(child,counter,lcp_label,s_count);
            }
            else
            {
                // If count of child nodes is 0, then push the counter to the queue for that node
                s_count->push(*counter);
                // Empty the stack till counter is 0, so as to restore it's state when it first entered the child node from the branching node
                while(*counter)
                {
                    lcp.pop();
                    *counter -=1;
                }
                lcp_label = temp->label; // Set LCP label back to Root's Label

            }
        }
    }
}

/* SEARCHING A TRIE
bool search(Trie *root, string key)
{
    Trie *temp = root;

    for(int i = 0; i < key.length(); i++)
    {
        int index = key[i] - 'a';
        if(!temp->children[index])
            return false;
        temp = temp->children[index];
    }
    return (temp != NULL );//&& temp->isEndOfWord);
}
*/

int main()
{
    int t;
    cin>>t; // Number of testcases

    while(t--)
    {
        string input;
        int len;
        cin>>len>>input; // Get input length and input string

        Trie *root = getNode();

        for(int i = 0; i < len; i++)
            for(int j = i; j < len; j++)
                insert(root, input.substr(i,j - i + 1)); // Insert all possible substrings into Trie for the given input

        /*
          cout<<"DISPLAY"<<endl;
          display(root);
        */

        //LCP COUNT
        int counter = 0;    //dummy variable
        queue <int> q;      //dummy variable
        lcp_sum(root,&counter,"",&q);
        cout<<sum<<endl;

        sum = 0;

        /*
          compress(root);
          cout<<"AFTER COMPRESSION"<<endl;
          display(root);
        */
    }
    return 0;
}

Кроме того, вот несколько примеров тестовых случаев (ожидаемые результаты),

1. Input : 2 2 ab 3 zzz

   Output : 6 46

2. Input : 3 1 a 5 afhce 8 ahsfeaa

   Output : 1 105 592

3. Input : 2 15 aabbcceeddeeffa 3 bab

   Output : 7100 26

Вышеупомянутая реализация не работает для тестовых случаев 2 и 3 (частичный вывод). Пожалуйста, предложите способ решить эту проблему. Любой другой подход к этой проблеме также хорош.

Saurabh P Bhandari 19.07.2019 источник

comment

оффтоп: зачем использовать typedef struct? - Fureeish 21.07.2019

comment

@Fureeish, без особой причины, следил за кодом в ссылках - Saurabh P Bhandari 23.07.2019

comment

Я настоятельно рекомендую не делать этого в C++. Это весьма полезно в C, но абсолютно бесполезно в C++. - Fureeish 23.07.2019

comment

@Fureeish, хорошо, есть идеи о подходе, который я должен использовать для этого вопроса? - Saurabh P Bhandari 24.07.2019

Ответы (2)

arrow_upward
7
arrow_downward

Ваша интуиция движется в правильном направлении.

По сути, всякий раз, когда вы видите проблему с LCP подстрок, вам следует подумать о структурах данных суффиксов, таких как деревья суффиксов, массивы суффиксов и суффиксные автоматы. Деревья суффиксов, пожалуй, самые мощные и с ними проще всего работать, и они прекрасно решают эту проблему.

Дерево суффиксов – это дерево, содержащее все суффиксы строки, в котором каждая неветвящаяся цепочка ребер сжата в одно длинное ребро. Проблема с обычным деревом со всеми достаточными значениями заключается в том, что оно имеет O(N^2) узлов, поэтому требует O(N^2) памяти. Учитывая, что вы можете предварительно вычислить LCP всех пар суффиксов за время и пространство O(N^2) с помощью тривиального динамического программирования, деревья суффиксов бесполезны без сжатия. Сжатое дерево занимает O(N) памяти, но оно все равно бесполезно, если вы строите его с помощью алгоритма O(N^2) (как вы делаете в своем коде). Вы должны использовать алгоритм Укконена для непосредственного построения дерева суффиксов в сжатом виде за время O(N). Изучение и реализация этого алгоритма — непростая задача, возможно, вам будет полезна веб-визуализация. В качестве последнего небольшого примечания я предположу для простоты, что в конец строки добавляется сигнальный символ (например, доллар $), чтобы гарантировать, что все листья являются явными узлами в дереве суффиксов.

Обратите внимание, что:

Каждый суффикс строки представляется как путь от корня к листу дерева (вспомним о часовом). Это переписка 1-1.
Каждая подстрока строки представляется как путь от корня к узлу в дереве (включая неявные узлы «внутри» длинных ребер) и наоборот. Более того, все подстроки с одинаковым значением отображаются в один и тот же путь. Чтобы узнать, сколько одинаковых подстрок отображается в конкретный путь к корневому узлу, подсчитайте, сколько листьев находится в поддереве ниже узла.
Чтобы найти LCP двух подстрок, найдите соответствующие им пути корневых узлов и возьмите LCA узлов. LCP — это глубина вершины LCA. Конечно, это будет физическая вершина, от которой вниз уходит несколько ребер.

Вот основная идея. Рассмотрим все пары подстрок и классифицируем их по группам с одной и той же вершиной LCA. Другими словами, давайте вычислим A[v] := количество пар подстрок с вершиной LCA, равной точно v. Если вычислить это число для каждой вершины v, то все, что останется для решения задачи, это: умножить каждое число на глубину узла и получить сумму. Кроме того, массив A[*] занимает всего O(N) места, а это значит, что мы еще не упустили шанс решить всю задачу за линейное время.

Напомним, что каждая подстрока — это путь к корневому узлу. Рассмотрим два узла (представляющих две произвольные подстроки) и вершину v. Назовем поддерево с корнем в вершине v "v-поддеревом". Затем:

Если оба узла находятся внутри v-поддерева, то их LCA также находится внутри v-поддерева.
В противном случае их LCA находится за пределами v-поддерева, поэтому он работает в обоих направлениях.

Введем другую величину B[v] := количество пар подстрок с вершиной LCA, находящихся внутри v-поддерева. В приведенном выше утверждении показан эффективный способ вычисления B[v]: это просто квадрат числа узлов в v-поддереве, поскольку каждая пара узлов в нем соответствует критерию. Однако здесь следует учитывать кратность, поэтому каждый узел нужно пересчитывать столько раз, сколько существует соответствующих ему подстрок.

Вот формулы:

    B[v] = Q[v]^2
    Q[v] = sum_s( Q[s] + M[s] * len(vs) )    for s in sons(v)
    M[v] = sum_s( M[s] )                     for s in sons(v)

Где M[v] — кратность вершины (т. е. сколько листьев присутствует в v-поддереве), а Q[v] — количество узлов в v-поддереве с учетом кратности. Конечно, вы можете сами вывести базовый случай для листьев. Используя эти формулы, вы можете вычислить M[*], Q[*], B[*] за один обход дерева за время O(N).

Остается только вычислить массив A[*] с помощью массива B[*]. Это можно сделать за O(N) с помощью простой формулы исключения:

A[v] = B[v] - sum_s( B[s] )           for s in sons(v)

Если вы реализуете все это, вы сможете решить всю проблему за идеальное время и пространство O(N). Или лучше сказать: O(N C) времени и пространства, где C — размер алфавита.

stgatilov 24.07.2019

comment

Эта визуализация потрясающая. - Jerry Jeremiah; 25.07.2019

comment

Спасибо за ответ, можно ли показать реализацию? Или можно расширить мою реализацию в обновленном вопросе? - Saurabh P Bhandari; 28.07.2019

arrow_upward
4
arrow_downward

Для решения проблемы действуйте, как показано ниже.

Если вы посмотрите на картинку, Я сделал попытку для всех подстрок abc.

Поскольку добавляются все подстроки, каждый узел в дереве имеет endOfWord true.

Теперь начните обход дерева вместе со мной в стиле DFS:

сумма = 0, стек = {пустой}
Сначала мы сталкиваемся с a. Теперь для L(A,B) a может образовать с собой 1 пару. Поэтому сделайте sum=sum+length и sum теперь станет 1. Теперь длина push, т.е. 1 в стеке. стек = {1}
Перейдите к b сейчас. Подстрока теперь ab. ab как и a может образовать с собой 1 пару. Поэтому сделайте sum=sum+length и sum теперь станет 3. Скопируйте содержимое стека в stack2. Мы получаем 1 как вершину стека. Это означает, что ab и a имеют LCP 1. Но они могут формировать L(a,ab) и L(ab,a). Итак, добавьте sum = sum + 2 * stack.top() . Сумма становится 3 + 2 = 5. Теперь скопируйте обратно стек2 в стек и поместите длину, т.е. 2. стек становится {2,1}.
Перейдите к c. Подстрока abc. Он образует 1 пару с самим собой, поэтому добавьте 3. Сумма станет 5 + 3 = 8. Скопируйте стек в стек2. Наверху у нас 2. abc и ab дадут LCP 2 и образуют 2 пары. Итак, сумма = сумма + 2*2. Сумма становится 12. Выскочит 2. В стеке теперь 1. abc и a имеют LCP 1 и могут образовывать 2 пары. Таким образом, сумма становится 12 + 2 = 14. Скопируйте обратно stack2 в стек и поместите длину, т.е. 3, в стек.
Мы достигли конца попытки. Очистите стек и начните с b длины 1 и продолжайте, как указано выше. Сумма становится 14+1 = 15 здесь
Мы достигаем c. Подстрока здесь bc. Сумма станет 15 + 2 + 2*1(верх) = 19.
Мы достигли конца попытки. Начните с c длины 1. Сумма = 19+1 = 20 сейчас.

Временная сложность: O(N^3). Поскольку требуется O (N ^ 2) для генерации подстрок и O (N) времени для их вставки в trie. Создание узла — это постоянное время. Поскольку все подстроки не имеют длины N, потребуется меньше N^3, но T.C. будет O(N^3).

Я протестировал этот метод, и он дает правильный вывод только для слов с различными символами.

Для слов, допускающих повторение символов, это не удается. Чтобы найти слова, которые допускают повторение символов, вам нужно будет сохранить информацию о том, сколько раз слова встречаются в позициях A и B для L (A, B). В стек нам нужно будет запихнуть пару length и B_count. Затем вы можете найти сумму LCP, используя длину (в стеке) * B_count (в стеке) * A_count текущей подстроки. Я не знаю никакого метода, чтобы найти количество A, B без использования 4 циклов.

См. изображения ниже для слова abb

Это все. Спасибо.

Adarsh Anurag 19.07.2019

comment

Вышеупомянутый метод отлично работает для слов с разными символами. Если кто-нибудь найдет лучший способ сделать это даже для повторяющихся символов, сообщите мне. Не голосуйте за мой ответ, потому что я потратил почти 3 часа, чтобы понять приведенный выше результат. - Adarsh Anurag; 20.07.2019

comment

Спасибо за ответ. При этом добавляются все подстроки, каждый узел в дереве имеет endOfWord true, я думаю, вы имеете в виду каждый листовой узел, независимо от подстрок, если узел имеет endOfWord как истину, то это листовой узел - Saurabh P Bhandari; 20.07.2019

comment

Не могли бы вы объяснить метод дублирования символов на примере? Это добавило бы большей ясности - Saurabh P Bhandari; 21.07.2019

comment

@SaurabhPBhandari подождите, пожалуйста - Adarsh Anurag; 21.07.2019

comment

Конечно, я также обновил вопрос на основе вашего объяснения. - Saurabh P Bhandari; 21.07.2019

comment

@SaurabhPBhandari Я добавляю изображения - Adarsh Anurag; 21.07.2019

Как использовать структуру данных Trie, чтобы найти сумму LCP для всех возможных подстрок?

Ответы (2)

Вопросы по теме