URL 분석

컴퓨터이야기/C 2005/12/29 15:14
기본 생각은 ftp, news, mms 프로토콜별로 분석하려했으나
왠지 http만 하면 될거 같아서..
뭐, 결국은 완벽하게 제대로 구현을 안했다는 얘기.
그래도 http는 잘된다는 핑계를.


#include <stdio.h>

#define T_COLON 0x01 /* ':' */
#define T_SLASH 0x02 /* '/' */
#define T_QUESTION 0x04 /* '?' */
#define T_HASH 0x08 /* '#' */
#define T_NUL 0x80 /* '\0' */


#define NOTEND_SCHEME (0xff)
#define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
#define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)

#define DEFAULT_HTTP_PORT 80

typedef struct uri
{
unsigned char *scheme ;
unsigned char *hostname ;
unsigned char *path ;
unsigned char *query ;
unsigned long ipaddress ;
unsigned char *linktext ;
short int port ;
int cycle ;
} uri_t;

typedef struct {
const char *name;
unsigned short default_port;
} schemes_t;

static const unsigned char uri_delims[256] = {
T_NUL,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,T_HASH,0,0,0,0,
0,0,0,0,0,0,0,T_SLASH,0,0,0,0,0,0,0,0,0,0,T_COLON,0,
0,0,0,T_QUESTION,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};

static schemes_t schemes[] = {
{"http", DEFAULT_HTTP_PORT},
{NULL, 0xFFFF}
};


uri_t *get_uri_components( const char *) ;
void free_uri(uri_t *);

////////////////////////////////////////////////
// function definition
////////////////////////////////////////////////

static unsigned short default_port_for_scheme(unsigned char* str)
{
schemes_t *scheme ;

if( str == NULL ) return 0;

for( scheme=schemes; scheme->name!=NULL; ++scheme)
if( strcasecmp(str,scheme->name)==0 ) return scheme->default_port ;

return 0;
}

uri_t *get_uri_components(const char *uri)
{
const char *s ;
const char *s1 ;
const char *hostinfo ;
char *endstr, *port_str ;
int port ;
uri_t *uricomp ;

uricomp = (uri_t *)malloc(sizeof(uri_t));

if( uri[0] == '/' )
{
deal_with_path:
s = uri ;
while( (uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0 ) ++s ;

if( s != uri ) uricomp->path = (unsigned char*)strndup(uri, s - uri);

if( *s == '?' )
{
++s ;
s1 = strchr(s,'#');
if( s1 )
uricomp->query = (unsigned char*)strndup(s,s1-s);
else
uricomp->query = (unsigned char*)strdup(s);
}
return uricomp ;
}

// 프로토콜을 찾는다.
s = uri ;
while( (uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0 ) ++s ;

if( s==uri || s[0]!=':' || s[1] !='/' || s[2] !='/') goto deal_with_path ;
uricomp->scheme = (unsigned char*)strndup(uri, s - uri);

s += 3 ;
hostinfo = s ;

while( (uri_delims[*(unsigned char*)s] & NOTEND_HOSTINFO) == 0 ) ++s ;
uri = s ;

uricomp->hostname = (unsigned char*)strndup(hostinfo,uri - hostinfo);

do
{
--s ;
} while( s >= hostinfo && *s != '@');
if( s < hostinfo )
{
deal_with_host:
s = (const char*)memchr(hostinfo, ':', uri - hostinfo);

if(s == NULL)
{
uricomp->port = default_port_for_scheme(uricomp->scheme);
uricomp->hostname = (unsigned char*)strndup(hostinfo,uri - hostinfo);
goto deal_with_path ;
}

uricomp->hostname = (unsigned char*)strndup(hostinfo,s - hostinfo);
++s ;

port_str = (unsigned char*)strndup(s,uri - s);

if( uri != s )
{
port = strtol(port_str,&endstr,10);
uricomp->port = port ;

if( *endstr == '\0') goto deal_with_path ;

//BAD URL
return (uri_t*)NULL ;
}

uricomp->port = default_port_for_scheme(uricomp->scheme);
goto deal_with_path ;
}

hostinfo = s + 1 ;

goto deal_with_host ;
}

void free_uri( uri_t *uri )
{
if( uri->scheme ) free(uri->scheme);
if( uri->hostname ) free(uri->hostname);
if( uri->path ) free(uri->path);
if( uri->query ) free(uri->query);
if( uri->linktext ) free(uri->linktext) ;

uri->port = 0;
uri->ipaddress = 0;
}



사용방법은 uri.o를 만드신 후, 링크하시구

int main()
{
     uri_t *u = get_uri_components(url);
     printf( "%s\n", u->hostname );
     free_uri(u);
}
Posted by 백구씨쥔장