09 April 2014

KMP算法其实很简单,参考阮一峰jBoxer,但是有人搞的很复杂july,而且例子不典型(过于简单),不过好在最后直接给出了本质的理解。

通过简单的链接你一定可以通过一句话来说明白KMP算法(如果不行,多看几遍这两篇文章),复杂的文章,代码可以参考,文章仅可作为参考。

###BF(Brute-Force)

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <string.h>

using namespace std;
//字符串匹配
/*
时间复杂度:
    最好 O(n)
    最差 O((n-m)*m)
空间复杂度:O(1)
*/
int BF(char* strS, char* strT, int pos = 0)
{
      //返回strT在strS中第pos个字符后出现的位置。
       int i = pos;
       int j = 0;
       int k = 0; 
       int lens = strlen(strS);
       int lent = strlen(strT);
       cout << "lens=" << lens << endl;
       cout << "lent=" << lent << endl;
       if (NULL == strS | NULL == strT) return 0;
       if (lens < lent) return 0;
       while(i + lent <= lens && j < lent)
       {
              if(strS[i+k] == strT[j])
              {
                  ++j;    //模式串跳步
                  ++k;    //主串(内)跳步
              }
              else
              {
                  ++i;
                  j=0;  //指针回溯,下一个首位字符
                  k=0;
              }
       }//end i
       if(j >= lent)
       {
          return i;
       }
       else
       {
          return 0;
       }
}//end



int BF2( char *Target, char *Pattern, int pos = 0)
{
    int TarLen = 0;        // Length of Target
    int PatLen = 0;        // Length of Pattern

    // Compute the length of Pattern
    while( '\0' != Pattern[PatLen] ) 
    { ++PatLen; } 
    while( '\0' != Target[TarLen] )
    {
        int TmpTarLen = TarLen;
        for(int i=0; i<PatLen; i++)
        {
            if( Target[TmpTarLen++] != Pattern[i] ) break;
            if( i == PatLen-1 )
            {
                return TarLen;
            }
        }
        TarLen++;
    }
}

###KMP(Knuth-Morris-Pratt)

typedef struct
{
       int length;
       char str[0];
}patternStr;

typedef struct
{
       size_t length; //include "\0"
       char str[0];
}targetStr;

//根据模式pStr的组成求其对应的next数组。
void getNext(patternStr *pStr, int next[])
{
    size_t len = pStr->length;
    next[0] = -1;
    size_t i = 0;
    int j = -1;
    while( i < len-1 )
    {
        if( j == -1 || pStr->str[i] == pStr->str[j] )
        {
            ++i;
            ++j;
            //next[i] = j;
            if(pStr->str[i] != pStr->str[j]) next[i] = j;
            else next[i] = next[j];
        }
        else
        {
            j = next[j];
        }
    }//end while
    cout << "next[ "<< len << " ]" << endl;
    for( i = 0; i < len; i++ )
    {
        cout << next[i] << "\t";
    }
    cout << endl;
}//end


int kmp(targetStr *t, patternStr *p, int next[])
{
    int i = 0;
    int j = 0;

    while(i < t->length && j < t->length)
    {
        if(j == -1 || t->str[i] == p->str[j])
        {
            i++;
            j++;
        }
        else
        {
            j = next[j];
        }
    }
    if(j == p->length)
    {
        return( i-p->length );
    }
    else
    {
        return -1;
    }
}

int main()
{
    //int rtnPos = 0;
    //char srStr[] = "abcabcabcabcdef";
    //char dstStr[] = "def";
    //char dstStr1[] = "defg";
    //int pos = BF2(srStr,dstStr,0);
    //int pos1 = BF2(srStr,dstStr1,0);
    //cout << "post: " << pos << endl;
    //cout << "post1: " << pos1 << endl;
    char srStr[] = "abcdaaabcabcd";
    char dstStr[] = "googgoogle";
    size_t ntmp_pStr_size = strlen(srStr);
    size_t ntmp_tStr_size = strlen(dstStr);

    size_t pLen = sizeof(patternStr) + (1 + ntmp_pStr_size)*sizeof(char);
    patternStr *strP = reinterpret_cast<patternStr*>(new char[pLen]);
    strP->length = ntmp_pStr_size;
    strcpy(strP->str,srStr);    //源串
    //strP->length = strlen(strP->str);

    size_t tLen = sizeof(targetStr) + (1 + ntmp_tStr_size)*sizeof(char);
    targetStr *strT = reinterpret_cast<targetStr*>(new char[tLen]);
    strT->length = ntmp_tStr_size;
    strcpy(strT->str,dstStr);     //模式串

    int *pNext = new int[strP->length];
    getNext(strP,pNext);
    int rtnPos = kmp(strT,strP,pNext);
    cout << rtnPos << endl;        //输出匹配位置

    delete []strP;
    delete []strT;
    delete []pNext;
    return 0;
}