#11 HonzaC
Mozes to riesit cez stavovy automat. Vstup budes prechadzat znak po znaku. V stavoch si budes uchovavat informaciu ktoru cast url-ky nacitavas a v pomocnom stringu budes mat nacitane znaky(pre danu cast url).
Nevyhoda tohto riesenia je ze tam bude dost stavov.
Ja som to spravyl tak ze om si vstup rozsekal na tokeny {key, value}, na to mi stacil stavovy automat s 2 stavmi. Key je enum{OPERATOR, WORD} a value je std::string. Takto spracovany vstup porovnavam sekvencne z lava do prava pomocou vzorov, ak sa pociatocne tokeny rovnaju vzoru, tak tokeny spracujem a zmazem, potom pokracujem na dalsi vzor. Tu je kod:
#include <regex>
#include <iterator>
#include <iostream>
#include <string>
#include <algorithm>
#include <cassert>
#include <map>
enum key_e{
WORD, //aj text aj cisla
OPERATOR //vsetko ostatne
};
struct Token{
key_e key;
std::string value;
Token(key_e key):key(key){}
Token(key_e key, std::string value):key(key), value(value){}
};
/*
* Upravi vstupny stringu na vektor tokenov
* (Mozno by bolo lepsie spravyt este 1 stav a umoznit spjit viacej operatorov do jedneho ak su vedla seba, zjednodusil by sa potom kod v maine)
*/
template <class Iterator>
std::vector<Token> lex(Iterator begin, Iterator end){
std::vector<Token> tokens;
enum state_e{
Begin,
Word
};
std::string value;
state_e state = Begin;
for(;begin != end; begin++){
char c = *begin;
if(std::isalpha(c) || std::isdigit(c)){
switch(state){
case Begin:
value += c;
state = Word;
break;
case Word:
value += c;
break;
}
}
else if(std::isspace(c)){
break;
}
else switch(state){
case Begin:
tokens.push_back({OPERATOR, {c}});
break;
case Word:
tokens.push_back({WORD, value});
tokens.push_back({OPERATOR, {c}});
value.clear();
state = Begin;
break;
}
}
switch(state){
case Begin:
break;
case Word:
tokens.push_back({WORD, value});
break;
}
return std::move(tokens);
}
/*
* porovna N tokenov z &tokens s tokenmi s &pattern, kde N je pocet tokenov v &pattern
*/
size_t parse(const std::vector<Token>& tokens, const std::vector<Token>& pattern){
if(tokens.size() < pattern.size())return 0;
if(std::equal(pattern.begin(), pattern.end(), tokens.begin(), [](const Token& p, const Token& t){
return
(p.key == t.key) //kluce sa musia rovnat
&& p.value.empty()?true:(p.value==t.value); //hodnoty sa musia rovnak len ak je hodnota definovana v pattern.value
}))return pattern.size();
return 0;
}
int main()
{
std::string str = "https://domena.sk:80/cesta/cesta/subor.hpp ";
auto tokens = lex(str.begin(), str.end());
std::map<std::string, std::string> data;
data["error"] = "false";
// HTTP:
if(size_t s = parse(tokens, {WORD, {OPERATOR, ":"}, {OPERATOR, "/"}, {OPERATOR, "/"}})){
if(tokens[0].value == "http" || tokens[0].value == "https")data["http"] = tokens[0].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
else if(size_t s = parse(tokens, {{WORD, "www"}, {OPERATOR, "."}})){
data["http"] = tokens[0].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
//DOMENA:
while(size_t s = parse(tokens, {WORD, {OPERATOR, "."}})){
data["domena"] += tokens[0].value;
data["domena"] += tokens[1].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
if(size_t s = parse(tokens, {WORD})){
if(data.find("domena") == data.end()){
data["error"] = "true";
data["domena"] = "<error>";
}
data["domena"] += tokens[0].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
else{
data["error"] = "true";
data["domena"] += "<error>";
}
//PORT:
if(size_t s = parse(tokens, {{OPERATOR, ":"}, WORD})){
data["port"] = tokens[1].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
//CESTA & SUBOR:
while(size_t s = parse(tokens, {{OPERATOR, "/"}, WORD})){
if(data["subor"].size())data["cesta"] += tokens[0].value;
data["cesta"] += data["subor"];
data["subor"] = tokens[1].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
//PRIPONA SUBORU:
if(size_t s = parse(tokens, {{OPERATOR, "."}, WORD})){
data["subor"] += tokens[0].value;
data["subor"] += tokens[1].value;
tokens.erase(tokens.begin(), tokens.begin() + s);
}
else{
data["error"] = "true";
data["subor"] += "<error>";
}
if(tokens.size())data["error"] = "true";
//VYPIS:
for(auto&d : data)std::cout << d.first << ": " << d.second << std::endl;
}
edit: oprava chyby v lex pri teste isspace().