Domain sampling



  • Established a perl programme ^https?:\\/\\/([a-z0-9\\.\\-]+\\.[a-z0-9\\-]{2,12})/.*but it needs to be applied in the C-programme.

    1. I can't figure out how to do this relay to get the domen out of the link. Used code

          re=pcre_compile(regexp,0,&error,&erroffset,NULL);
          if (!re) continue;
          if (pcre_exec(re,NULL,link,strlen(link),0,0,ovector,sizeof(ovector))>0)
          {
           printf("%s\n",ovector);
          }
          pcre_free(re);
      
    2. I doubt the correctness of the regular. Maybe he didn't.



  • I used some kind of rfc2068. http://ru.wikipedia.org/wiki/URI

    #define SPLIT_URI_REGEX \
      "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
    

    (not pcre) expression.

    Example:

    ...
    struct uri {
      char            *scheme,
        *net_loc,
        *path,
        *query,
        *fragment;
    };
    ...
    

    static regex_t reguri;
    static pthread_mutex_t reglock = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
    static int inireg = 0;

    #define SPLIT_URI_REGEX
    "^(([^:/?#]+):)?(//([^/?#]))?([^?#])(\?([^#]))?(#(.))?"
    #define ISCHEME 2
    #define INETLOC 4
    #define IPATH 5
    #define IQUERY 7
    #define IFRAGMENT 8

    static char *
    mkuri_comp (char *uri, regmatch_t *p)
    {
    if (p->rm_so < 0)
    return NULL;

    int l = p->rm_eo - p->rm_so;
    char buf[4096];
    strncpy(buf, uri+p->rm_so, l);
    buf[l] = 0;
    return strdup(buf);
    }

    void
    split_uri (char *uri, struct uri *suri)
    {
    if (!inireg) {
    pthread_mutex_lock(&reglock);
    if (!inireg) {
    regcomp (&reguri, SPLIT_URI_REGEX, REG_EXTENDED);
    inireg = 1;
    }
    pthread_mutex_unlock(&reglock);
    }
    regmatch_t match[reguri.re_nsub];
    if (regexec (&reguri, uri, reguri.re_nsub, match, 0) == 0) { // OK
    suri->scheme = mkuri_comp (uri,&match[ISCHEME]);
    suri->net_loc = mkuri_comp (uri,&match[INETLOC]);
    suri->path = mkuri_comp (uri,&match[IPATH]);
    suri->query = mkuri_comp (uri,&match[IQUERY]);
    suri->fragment = mkuri_comp (uri,&match[IFRAGMENT]);
    }
    }

    Pre-emptive field cancellation struct uri challenge split_uri()




Suggested Topics

  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2
  • 2