diff options
Diffstat (limited to 'src/utilfuns/url.cpp')
-rw-r--r-- | src/utilfuns/url.cpp | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/src/utilfuns/url.cpp b/src/utilfuns/url.cpp new file mode 100644 index 0000000..3a50e2c --- /dev/null +++ b/src/utilfuns/url.cpp @@ -0,0 +1,269 @@ +/****************************************************************************** +* url.cpp - code for an URL parser utility class +* +* $Id: url.cpp 2013 2006-11-25 22:01:04Z dglassey $ +* +* Copyright 2003 CrossWire Bible Society (http://www.crosswire.org) +* CrossWire Bible Society +* P. O. Box 2528 +* Tempe, AZ 85280-2528 +* +* This program is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License as published by the +* Free Software Foundation version 2. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +*/ + +//Sword includes +#include <url.h> +#include <swlog.h> + +//system includes +#include <ctype.h> +#include <map> +#include <stdio.h> +#include <iostream> + +SWORD_NAMESPACE_START + +/** + * Constructors/Destructors + */ +URL::URL(const char* url) + : url(""), + protocol(""), + hostname(""), + path("") +{ + if (url && strlen(url)) { + this->url = url; + parse(); + } +} + +const char* URL::getProtocol() const { + return protocol.c_str(); +} + +const char* URL::getHostName () const { + return hostname.c_str(); +} + +const char* URL::getPath() const { + return path.c_str(); +} + +const URL::ParameterMap &URL::getParameters() const { + return parameterMap; +} + +/** + * Returns the value of an URL parameter. For the URL "http://www.crosswire.org/index.jsp?page=test&user=nobody" the value of the parameter "page" would be "test". + * If the parameter is not set an empty string is returned. + */ +const char* URL::getParameterValue(const char* name) const { + static SWBuf emptyStr(""); + + ParameterMap::const_iterator it = parameterMap.find(name); + static SWBuf retVal; + + if (it != parameterMap.end()) + retVal = it->second.c_str(); + else + retVal = emptyStr.c_str(); + + return retVal.c_str(); +} + + +/** Parse the URL. + * Parse the URL into the protocol, the hostname, the path and the paramters with their values + * + */ +void URL::parse () { + /* format example protocol://hostname/path/path/path.pl?param1=value1&param2=value2 + * we include the script name in the path, so the path would be /path/path/path.pl in this example + * & could also be & + */ + + //1. Init + const char* urlPtr = url.c_str(); + + protocol = ""; + hostname = ""; + path = ""; + parameterMap.clear(); + + // 2. Get the protocol, which is from the begining to the first :// + const char* end = strchr( urlPtr, ':' ); + if (end) { //protocol was found + protocol.append(urlPtr, end-urlPtr); + urlPtr = end + 1; + + //find the end of the protocol separator (e.g. "://") + for (; (*urlPtr == ':') || (*urlPtr == '/'); urlPtr++); + } + + //3.Get the hostname part. This is the part from pos up to the first slash + bool checkPath = true; + bool checkParams = true; + bool checkAnchor = true; + + end = strchr(urlPtr, '/'); + if (!end) { + checkPath = false; + end = strchr(urlPtr, '?'); + } + if (!end) { + checkParams = false; + end = strchr(urlPtr, '#'); + } + if (!end) { + checkAnchor = false; + end = urlPtr+strlen(urlPtr); + } + + hostname.append(urlPtr, end-urlPtr); + + urlPtr = end + ((*end)? 1 : 0); + + if (checkPath) { + end = strchr(urlPtr, '?'); + if (!end) { + checkParams = false; + end = strchr(urlPtr, '#'); + } + if (!end) { + checkAnchor = false; + end = urlPtr+strlen(urlPtr); + } + + path.append(urlPtr, end-urlPtr); + + urlPtr = end + ((*end)? 1 : 0); + } + + if (checkParams) { + //5. Fill the map with the parameters and their values + SWBuf paramName; + SWBuf paramValue; + +/* + end = strchr(urlPtr, '#'); + if (!end) { + checkAnchor = false; + end = urlPtr+strlen(urlPtr); + } +*/ + //end = (start && strchr(start, '?')) ? strchr(start, '?')+1 :0; + end = urlPtr; + while (end) { + paramName = ""; + paramValue = ""; + + //search for the equal sign to find the value part + const char* valueStart = strchr(end, '='); + if (valueStart) { + const char* valueEnd = strstr(valueStart, "&") ? strstr(valueStart, "&") : strstr(valueStart, "&"); //try to find a new paramter part + + if (valueEnd) { + paramName.append(end, valueStart-end); + paramValue.append(valueStart+1, valueEnd-(valueStart+1)); + } + else { //this is the last paramter of the URL + paramName.append(end, valueStart-end); + paramValue.append(valueStart+1); + } + + if (paramName.length() && paramValue.length()) {//insert the param into the map if it's valid + paramName = decode(paramName.c_str()); + paramValue = decode(paramValue.c_str()); + + parameterMap[ paramName ] = paramValue; + } + } + else { + break; //no valid parameter in the url + } + + const char *start = end+1; + end = strstr(start, "&") ? strstr(start, "&")+5 : (strstr(start, "&") ? strstr(start, "&")+1 : 0); //try to find a new paramter part + } + } +} + +const SWBuf URL::encode(const char *urlText) { + /*static*/ SWBuf url; + url = urlText; + + typedef std::map< unsigned char, SWBuf > DataMap; + DataMap m; + for (unsigned short int c = 32; c <= 255; ++c) { //first set all encoding chars + if ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || strchr("-_.!~*'()", c)) { + continue; //we don't need an encoding for this char + } + + SWBuf buf; + buf.setFormatted("%%%-.2X", c); + m[c] = buf; + } + //the special encodings for certain chars + m[' '] = '+'; + + SWBuf buf; + const int length = url.length(); + for (int i = 0; i < length; i++) { //fill "buf" + const char& c = url[i]; + buf.append( ((m[c].length()) ? m[c] : SWBuf(c)) ); + } + + url = buf; + return url; +} + +const SWBuf URL::decode(const char *encoded) { + /*static*/ SWBuf text; + text = encoded; + + SWBuf decoded; + const int length = text.length(); + int i = 0; + + while (i < length) { + char a = text[i]; + + if ( a == '+' ) { //handle special cases + decoded.append(' '); + } + else if ( (a == '%') && (i+2 < length)) { //decode the %ab hex encoded char + const char b = toupper( text[i+1] ); + const char c = toupper( text[i+2] ); + + if (isxdigit(b) && isxdigit(c)) { //valid %ab part + unsigned int dec = 16 * ((b >= 'A' && b <= 'F') ? (b - 'A' + 10) : (b - '0')); //dec value of the most left digit (b) + dec += (c >= 'A' && c <= 'F') ? (c - 'A' + 10) : (c - '0'); //dec value of the right digit (c) + + decoded.append((char)dec); //append the decoded char + + i += 2; //we jump over the %ab part; we have to leave out three, but the while loop adds one, too + } + } + else { //just append the char + decoded.append(a); + } + + i++; + } + + if (decoded.length()) { + text = decoded; + } + return text; +} + +SWORD_NAMESPACE_END |