c++11 boost - 字符串(3) - 正则表达式

2018-07-11 20:46:45

xpressive 是一个先进的、灵活的、功能强大的正则表达式,语法与 std::regex 的正则表达式解析器类似。

正则表达式分为动态的和静态的,动态正则表达式是在运行时编译的,而静态的在编译器就编译好,所以性能更好。

编译一个正则表达式需要很多运行时开销,少做 sregexcregex 对象的创建工作,尽量重用。

同样,smatchcmatch 也应当尽量重用,它们缓存了动态分配的内存,不至于每次重新分配内存。

比较常用的静态语法元素有:

>> 连接两个正则表达式
_ 相当于 . 
*+! 相当于 *+?
bos eos 相当于 ^$
_d _w _s _n 相当于 \d \w \s \n
s1 s2 相当于 $1 $2
as_xpr() 从字符串生成一个静态正则表达式
[] 语义动作,在匹配发生时执行特定的功能

// Copyright (c) 2015
// Author: Chrono Law
#include <iostream>
using namespace std;

#include <boost/xpressive/xpressive.hpp>
//#include <boost/xpressive/xpressive_dynamic.hpp>
using namespace boost;

//简单的匹配判断
void case1()
{
    using namespace boost::xpressive;

    cregex reg = cregex::compile("a.c");

    assert( regex_match("abc", reg));
    assert( regex_match("a+c", reg));

    assert(!regex_match("ac", reg));
    assert(!regex_match("abd", reg));
}

void case2()
{
    using namespace boost::xpressive;

	//"\\d{6}(1|2)\\d{3}(0|1)\\d[0-3]\\d\\d{3}(X|\\d)" c++98字符串
    cregex reg = cregex::compile(
            R"---(\d{6}(1|2)\d{3}(0|1)\d[0-3]\d\d{3}(X|\d))---",icase);  //c++11字符串表示

    assert( regex_match("999555197001019999", reg));
    assert( regex_match("99955519700101999X", reg));
    assert( regex_match("99955520100101999x", reg));

    assert(!regex_match("99955520100101999Z", reg));
    assert(!regex_match("99955530100101999X", reg));
    assert(!regex_match("999555201099019998", reg));
    assert(!regex_match("999555201012419991", reg));
}


//获取匹配结果
void case3()
{
    using namespace boost::xpressive;

    cregex reg = cregex::compile(
            R"--(\d{6}((1|2)\d{3})((0|1)\d)([0-3]\d)(\d{3}(X|\d)))--",icase);
            //"\\d{6}((1|2)\\d{3})((0|1)\\d)([0-3]\\d)(\\d{3}(X|\\d))",icase);

    cmatch what;
    assert(regex_match("999555197001019999", what, reg));
    for (auto& x : what)
    {   cout << "[" << x << "]";    }
    cout << endl;

    cout << "date:"<< what[1] << what[3] << what[5] << endl;
}

//sregex用来操作std::string
void case4()
{
    using namespace boost::xpressive;

    string str("readme.txt");

    sregex start_reg = sregex::compile("^re.*");
    sregex end_reg = sregex::compile(".*txt$");

    assert(regex_match(str, start_reg));
    assert(regex_match(str, end_reg));

}


//查找,不要求完全匹配,查到就返回true
void case5()
{
    using namespace boost::xpressive;

    char str[] = "there is a POWER-suit item";
    cregex reg =cregex::compile("(power)-(.{4})", icase);

    assert(regex_search(str, reg));

    cmatch what;
    regex_search(str, what, reg);
    assert(what.size() == 3);

    cout << what[1] << what[2] << endl;
    assert(!regex_search("error message", reg));
}

void case6()
{
    using namespace boost::xpressive;

    string str("readme.TXT");

    sregex start_reg = sregex::compile("^re");
    sregex end_reg = sregex::compile("txt$",icase);

    assert(regex_search(str, start_reg));               //starts_with
    assert(regex_search(str, end_reg));                 //ends_with
    assert(regex_search(str, sregex::compile("me")));   //contains
}

//匹配替换,$1代表第一个子匹配,$&代表全匹配
void case7()
{
    using namespace boost::xpressive;

    string str("readme.txt");

    sregex reg1 = sregex::compile("(.*)(me)");
    sregex reg2 = sregex::compile("(t)(.)(t)");

    cout << regex_replace(str, reg1, "manual") << endl;
    cout << regex_replace(str, reg1, "$1you") << endl;
    cout << regex_replace(str, reg1, "$&$&") << endl;
    cout << regex_replace(str, reg2, "$1N$3") << endl;

    str = regex_replace(str, reg2, "$1$3");
    cout << str << endl;

    {
        string str("2010 Happy new Year!!!");

        sregex reg1 = sregex::compile("^(\\d| )*");
        sregex reg2 = sregex::compile("!*$");

        cout << regex_replace(str, reg1, "") << endl;
        cout << regex_replace(str, reg2, "") << endl;

        str = regex_replace(str, reg1, "Y2000 ");
        cout << str << endl;

    }
}


//正则迭代
void case8()
{
    using namespace boost::xpressive;

    string str("Power-bomb, power-suit, pOWER-beam all items\n");

    sregex reg = sregex::compile("power-(\\w{4})", icase);

    sregex_iterator pos(str.begin(), str.end(), reg);
    sregex_iterator end;
    for(;pos != end;)
    {
        cout << "[" << (*pos)[0] << "]";
        ++pos;
    }
    cout << endl;
}

//正则分词
void case9()
{
    using namespace boost::xpressive;

    char str[] = "*Link*||+Mario+||Zelda!!!||Metroid";

    cregex reg = cregex::compile("\\w+", icase);

    cregex_token_iterator pos(str, str + strlen(str), reg);
    for(;pos != cregex_token_iterator();++pos)
    {
        cout << "[" << *pos << "]";
    }
    cout << endl;
	
    cregex split_reg = cregex::compile("\\|\\|");
    pos = cregex_token_iterator(str, str + strlen(str),
            split_reg, -1);  //-1代表分词
    for(;pos != cregex_token_iterator();++pos)
    {
        cout << "[" << *pos << "]";
    }
    cout << endl;
}

//工厂类,regex_compiler
//2个用typedef定义的为 cregex_compiler,sregex_compiler
void case10()
{
    using namespace boost::xpressive;

    cregex_compiler rc;

    rc["reg1"] = rc.compile("a|b|c");
    rc["reg2"] = rc.compile("\\d*");

    assert(!regex_match("abc", rc["reg1"]));
    assert(regex_match("123", rc["reg2"]));

}

//格式化器
#include <boost/algorithm/string.hpp>

void case11()
{
    using namespace boost::xpressive;

	//全部改为大写
    struct formater
    {
        string operator()(cmatch const &m)const
        {   return boost::to_upper_copy(m[0].str());    }
    };
    char str[] = "*Link*||+Mario+||Zelda!!!||Metroid";

    cregex reg = cregex::compile("\\w+", icase);
    cout << regex_replace(str, reg, formater()) << endl;
}

//静态正则
void case12()
{
    using namespace boost::xpressive;

    //const char *str2 = "123abc";
    auto str = "123abc";
    cregex reg = bos >> *_d >> +_w;
    assert(regex_match(str, reg));

}

int main()
{
    case1();
    case2();
    case3();
    case4();
    case5();
    case6();
    case7();
    case8();
    case9();
    case10();
    case11();
    case12();
}
[root@192 c++]# g++ -std=c++11 main.cpp 
[root@192 c++]# ./a.out 
[999555197001019999][1970][1][01][0][01][9999][9]
date:19700101
POWERsuit
manual.txt
readyou.txt
readmereadme.txt
readme.tNt
readme.tt
Happy new Year!!!
2010 Happy new Year
Y2000 Happy new Year!!!
[Power-bomb][power-suit][pOWER-beam]
[Link][Mario][Zelda][Metroid]
[*Link*][+Mario+][Zelda!!!][Metroid]
*LINK*||+MARIO+||ZELDA!!!||METROID


 备注

1.编译器版本gcc4.8.5,运行环境centos7 64位
2.本文只做简单记录用,详细用法请参考 Boost Library,或者是罗剑锋的 boost程序库完全开发指南 书本
3..原文地址http://www.freecls.com/a/2712/a3


©著作权归作者所有
收藏
推荐阅读
简介
天降大任于斯人也,必先苦其心志。