lexpp
Small Extremely Powerful Header Only C++ Lexical Analyzer/String Parser Library
Lexpp is made with simplicity and size in mind. The entire library is about 500 lines!
Lexpp is very powerful and can be used for almost all parsing needs!
You can check the examples/ for more elaborate usage.
How to Use
Just place the lexpp.h
file in your project include directory.
In one cpp file define LEXPP_IMPLEMENTATION
before importing lexpp like this:
#define LEXPP_IMPLEMENTATION
#include "lexpp.h"
You are all done to use lexpp!
Basic Examples
String Parsing
std::string data = "some text to parse! ";
std::vector<std::string> tokens = lexpp::lex(data, " ;\n");
for(std::string& token : tokens){
std::cout << token << std::endl;
}
Some more string parsing
std::string data = "some text to parse! ";
std::vector<std::string> tokens = lexpp::lex(data, {"<=", "<<", "\n", "::", ",", "}", "{", ";", " "}, false);
for(std::string& token : tokens){
std::cout << token << std::endl;
}
Using Custom Token Classifier
enum MyTokens{
Keyword = 0,
Number,
String,
Other
};
static std::string TokenToString(int tok){
switch(tok){
case Keyword: return "Keyword";
case Number: return "Number";
case String: return "String";
case Other: return "Other";
}
}
Now the Lexing
std::vector<std::string> keywords = {"for", "void", "return", "if", "int"};
std::vector<lexpp::Token> tokens = lexpp::lex(data, {"<=", "<<", "\n", "::", ",", "}", "{", "(", ")" ";", " "}, [keywords](std::string& token, bool* discard, bool is_separator) -> int {
if(std::find(keywords.begin(), keywords.end(), token) != keywords.end()){
return MyTokens::Keyword;
}
if(is_number(token))
return MyTokens::Number;
else
return MyTokens::String;
}, false);
for(lexpp::Token& token : tokens){
std::cout << TokenToString(token.type) << " -> " << token.value << std::endl;
}
TokenParser
class
Using the We need to extend the TokenParser
class to have our cuastom token parser
class MyTokenParser : public lexpp::TokenParser
{
public:
MyTokenParser(std::string data, std::string separators)
:TokenParser(data, separators, false){}
virtual int process_token(std::string& token, bool* discard, bool isSeparator) override
{
if(std::find(keywords.begin(), keywords.end(), token) != keywords.end())
return MyTokens::Keyword;
else if(is_number(token))
return MyTokens::Number;
else if(isSeparator)
return MyTokens::Other;
else
return MyTokens::String;
}
std::vector<std::string> keywords = {"for", "void", "return", "if", "int"};
};
Now using the class with the lexer
std::vector<lexpp::Token> tokens = lexpp::lex(std::make_shared<MyTokenParser>(data, "\n :,[]{}().\t"));
for(lexpp::Token& token : tokens){
std::cout << TokenToString(token.type) << " -> " << token.value << std::endl;
}
Making an email parser with lexpp
First a strutto store out data
struct Email{
std::string name;
std::string domainFront;
std::string domainEnd;
std::string domain;
};
Now we need to make our custom token parser for email parsing
class EmailTokenParser : public lexpp::TokenParser
{
public:
EmailTokenParser(std::string data, std::string separators = "\n@.")
:TokenParser(data, separators, true){}
virtual int process_token(std::string& token, bool* discard, bool isSeparator) override
{
if(isSeparator){
if(ci == 2){
currMail.domain = currMail.domainFront + "." + currMail.domainEnd;
emailIds.push_back(currMail);
ci = 0;
*discard = true;
return 0;
}
if(token.size() <= 0){
*discard = true;
return 0;
}
if(token == "\n"){
ci = 0;
*discard = true;
return 0;
}
else if(token == "@"){
ci = 1;
*discard = true;
return 0;
}
else if(token == "."){
ci = 2;
*discard = true;
return 0;
}
}
if(ci == 0)
currMail.name = token;
else if(ci == 1)
currMail.domainFront = token;
else if(ci == 2)
currMail.domainEnd = token;
}
int ci = 0;
Email currMail;
std::vector<Email> emailIds;
};
Now finallh calling lex
std::shared_ptr<EmailTokenParser> tok_parser = std::make_shared<EmailTokenParser>(data+"\n", "\n@.");
lexpp::lex(tok_parser);
for(Email& email : tok_parser->emailIds){
std::cout << "Email : \nNAME: " << email.name << "\nDOMAIN : " << email.domain << std::endl;
}