269 lines
6.0 KiB
C++
269 lines
6.0 KiB
C++
/*
|
|
* Lightweight URL & URI parser (RFC 1738, RFC 3986)
|
|
* https://github.com/corporateshark/LUrlParser
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (C) 2015-2020 Sergey Kosarevsky (sk@linderdaum.com)
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
* copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include "LUrlParser.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <stdlib.h>
|
|
|
|
namespace
|
|
{
|
|
// check if the scheme name is valid
|
|
bool isSchemeValid(const std::string& schemeName)
|
|
{
|
|
for (auto c : schemeName)
|
|
{
|
|
if (!isalpha(c) && c != '+' && c != '-' && c != '.') return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool LUrlParser::ParseURL::getPort(int* outPort) const
|
|
{
|
|
if (!isValid()) { return false; }
|
|
|
|
const int port = atoi(port_.c_str());
|
|
|
|
if (port <= 0 || port > 65535) { return false; }
|
|
|
|
if (outPort) { *outPort = port; }
|
|
|
|
return true;
|
|
}
|
|
|
|
// based on RFC 1738 and RFC 3986
|
|
LUrlParser::ParseURL LUrlParser::ParseURL::parseURL(const std::string& URL)
|
|
{
|
|
LUrlParser::ParseURL result;
|
|
|
|
const char* currentString = URL.c_str();
|
|
|
|
/*
|
|
* <scheme>:<scheme-specific-part>
|
|
* <scheme> := [a-z\+\-\.]+
|
|
* For resiliency, programs interpreting URLs should treat upper case letters as equivalent to lower case in scheme names
|
|
*/
|
|
|
|
// try to read scheme
|
|
{
|
|
const char* localString = strchr(currentString, ':');
|
|
|
|
if (!localString)
|
|
{
|
|
return ParseURL(LUrlParserError_NoUrlCharacter);
|
|
}
|
|
|
|
// save the scheme name
|
|
result.scheme_ = std::string(currentString, localString - currentString);
|
|
|
|
if (!isSchemeValid(result.scheme_))
|
|
{
|
|
return ParseURL(LUrlParserError_InvalidSchemeName);
|
|
}
|
|
|
|
// scheme should be lowercase
|
|
std::transform(result.scheme_.begin(), result.scheme_.end(), result.scheme_.begin(), ::tolower);
|
|
|
|
// skip ':'
|
|
currentString = localString + 1;
|
|
}
|
|
|
|
/*
|
|
* //<user>:<password>@<host>:<port>/<url-path>
|
|
* any ":", "@" and "/" must be normalized
|
|
*/
|
|
|
|
// skip "//"
|
|
if (*currentString++ != '/') return ParseURL(LUrlParserError_NoDoubleSlash);
|
|
if (*currentString++ != '/') return ParseURL(LUrlParserError_NoDoubleSlash);
|
|
|
|
// check if the user name and password are specified
|
|
bool bHasUserName = false;
|
|
|
|
const char* localString = currentString;
|
|
|
|
while (*localString)
|
|
{
|
|
if (*localString == '@')
|
|
{
|
|
// user name and password are specified
|
|
bHasUserName = true;
|
|
break;
|
|
}
|
|
else if (*localString == '/')
|
|
{
|
|
// end of <host>:<port> specification
|
|
bHasUserName = false;
|
|
break;
|
|
}
|
|
|
|
localString++;
|
|
}
|
|
|
|
// user name and password
|
|
localString = currentString;
|
|
|
|
if (bHasUserName)
|
|
{
|
|
// read user name
|
|
while (*localString && *localString != ':' && *localString != '@') localString++;
|
|
|
|
result.userName_ = std::string(currentString, localString - currentString);
|
|
|
|
// proceed with the current pointer
|
|
currentString = localString;
|
|
|
|
if (*currentString == ':')
|
|
{
|
|
// skip ':'
|
|
currentString++;
|
|
|
|
// read password
|
|
localString = currentString;
|
|
|
|
while (*localString && *localString != '@') localString++;
|
|
|
|
result.password_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
}
|
|
|
|
// skip '@'
|
|
if (*currentString != '@')
|
|
{
|
|
return ParseURL(LUrlParserError_NoAtSign);
|
|
}
|
|
|
|
currentString++;
|
|
}
|
|
|
|
const bool bHasBracket = (*currentString == '[');
|
|
|
|
// go ahead, read the host name
|
|
localString = currentString;
|
|
|
|
while (*localString)
|
|
{
|
|
if (bHasBracket && *localString == ']')
|
|
{
|
|
// end of IPv6 address
|
|
localString++;
|
|
break;
|
|
}
|
|
else if (!bHasBracket && (*localString == ':' || *localString == '/'))
|
|
{
|
|
// port number is specified
|
|
break;
|
|
}
|
|
|
|
localString++;
|
|
}
|
|
|
|
result.host_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
|
|
// is port number specified?
|
|
if (*currentString == ':')
|
|
{
|
|
currentString++;
|
|
|
|
// read port number
|
|
localString = currentString;
|
|
|
|
while (*localString && *localString != '/') localString++;
|
|
|
|
result.port_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
}
|
|
|
|
// end of string
|
|
if (!*currentString)
|
|
{
|
|
result.errorCode_ = LUrlParserError_Ok;
|
|
|
|
return result;
|
|
}
|
|
|
|
// skip '/'
|
|
if (*currentString != '/')
|
|
{
|
|
return ParseURL(LUrlParserError_NoSlash);
|
|
}
|
|
|
|
currentString++;
|
|
|
|
// parse the path
|
|
localString = currentString;
|
|
|
|
while (*localString && *localString != '#' && *localString != '?') localString++;
|
|
|
|
result.path_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
|
|
// check for query
|
|
if (*currentString == '?')
|
|
{
|
|
// skip '?'
|
|
currentString++;
|
|
|
|
// read query
|
|
localString = currentString;
|
|
|
|
while (*localString&&* localString != '#') localString++;
|
|
|
|
result.query_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
}
|
|
|
|
// check for fragment
|
|
if (*currentString == '#')
|
|
{
|
|
// skip '#'
|
|
currentString++;
|
|
|
|
// read fragment
|
|
localString = currentString;
|
|
|
|
while (*localString) localString++;
|
|
|
|
result.fragment_ = std::string(currentString, localString - currentString);
|
|
|
|
currentString = localString;
|
|
}
|
|
|
|
result.errorCode_ = LUrlParserError_Ok;
|
|
|
|
return result;
|
|
}
|