Domain sampling
-
Established a perl programme
^https?:\\/\\/([a-z0-9\\.\\-]+\\.[a-z0-9\\-]{2,12})/.*
but it needs to be applied in the C-programme.I can't figure out how to do this relay to get the domen out of the link. Used code
re=pcre_compile(regexp,0,&error,&erroffset,NULL); if (!re) continue; if (pcre_exec(re,NULL,link,strlen(link),0,0,ovector,sizeof(ovector))>0) { printf("%s\n",ovector); } pcre_free(re);
I doubt the correctness of the regular. Maybe he didn't.
-
I used some kind of rfc2068. http://ru.wikipedia.org/wiki/URI
#define SPLIT_URI_REGEX \ "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
(not pcre) expression.
Example:
... struct uri { char *scheme, *net_loc, *path, *query, *fragment; }; ...
static regex_t reguri;
static pthread_mutex_t reglock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
static int inireg = 0;#define SPLIT_URI_REGEX
"^(([^:/?#]+):)?(//([^/?#]))?([^?#])(\?([^#]))?(#(.))?"
#define ISCHEME 2
#define INETLOC 4
#define IPATH 5
#define IQUERY 7
#define IFRAGMENT 8static char *
mkuri_comp (char *uri, regmatch_t *p)
{
if (p->rm_so < 0)
return NULL;int l = p->rm_eo - p->rm_so;
char buf[4096];
strncpy(buf, uri+p->rm_so, l);
buf[l] = 0;
return strdup(buf);
}void
split_uri (char *uri, struct uri *suri)
{
if (!inireg) {
pthread_mutex_lock(®lock);
if (!inireg) {
regcomp (®uri, SPLIT_URI_REGEX, REG_EXTENDED);
inireg = 1;
}
pthread_mutex_unlock(®lock);
}
regmatch_t match[reguri.re_nsub];
if (regexec (®uri, uri, reguri.re_nsub, match, 0) == 0) { // OK
suri->scheme = mkuri_comp (uri,&match[ISCHEME]);
suri->net_loc = mkuri_comp (uri,&match[INETLOC]);
suri->path = mkuri_comp (uri,&match[IPATH]);
suri->query = mkuri_comp (uri,&match[IQUERY]);
suri->fragment = mkuri_comp (uri,&match[IFRAGMENT]);
}
}
Pre-emptive field cancellation
struct uri
challengesplit_uri()
♪