Jeff_Relf
2005-01-12 16:08:11 UTC
Re: X.CPP's HTML to TXT converter in Attachmnts(),
I just fixed a buffer overflow problem
caused by posts that begin like this
( Leave it to unix deviants to intentionally post malformed HTML ):
<html><input type crash></html> news:slrncu9h9r.gve.The-Central-***@linux.client.comcast.net
The problem and the fix are described in the code below
( from http://www.Cotse.NET/users/jeffrelf/X.CPP ).
// BB points to the first line ( not char ), EE to one beyond the last line.
// Mode == 1 if BB points to the raw header rather than just the body.
Attachmnts ( int Mode, LnA BB, LnA EE ) { LnA TT = 0, PP = BB ; LnP P ;
// if ( Mode == 2 ) { while ( ++ PP < EE ) __Fm return ; }
if ( Mode == 1 ) {
while ( ++ PP < EE && * * PP ) __Fm BB = PP ; _Fm(("")) }
if ( PP + 1 >= EE ) return ;
if ( EqiN( PP [ 1 ], "<HTML>", 6 ) ) {
while ( ++ PP < EE ) { LnP T, B = * PP, P = B - 1 ; char C = 1 ;
while ( C && * ( T = P + 1 ) ) {
while ( ( C = * ++ P ) && C != '<' ); int Tag = C == '<' && P [ 1 ];
if ( P > T ) { * P = 0 ; P = T - 1 ; C = 1 ;
while ( C && * ( T = P + 1 ) ) {
while ( ( C = * ++ P ) && C != '&' );
if ( ! C || ! P [ 1 ] ) B += Str( B, "%s", T );
else { * P = 0 ; C = * ++ P ; char CC ;
if ( C == '#' ) { CC = atoi( ++ P );
while ( ( C = * ++ P ) && isdigit( C ) ); }
else {
if ( EqiN( P, "lt", 2 ) ) CC = '<' ;
else if ( EqiN( P, "gt", 2 ) ) CC = '>' ; else CC = '¿' ;
while ( ( C = * ++ P ) && C != ';' ); }
// By only adding a character when the   type snytax
// is found, as I do here, a buffer overflow is avoided.
// Before, it was adding a char to all lines, regardless,
// which only worked for HTML were tags and such
// are always removed from each line, leaving room
// ( e.g. HTML-only e-mails from Hotmail.COM and vendors ).
B += Str( B, "%s%c", T, CC ); } } }
if ( Tag ) while ( ( C = * ++ P ) && C != '>' ); }
if ( B == * PP ) continue ; __Fm } return ; }
if ( EqN( PP [ 1 ], "-----BEGIN PGP", 14 ) ) BB = PP += 3 ;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break; int Body = 0 ;
if ( PP < EE && EqN( * PP ,"--", 2 ) && EqiN( PP [ 1 ],"Content-", 8 ) ) {
TT = PP ;
while ( ++ PP < EE && * * PP )
if ( EqiN( * PP,"Content-Type: text/plain", 24 ) ) Body = 1 ;
if ( ! Body ) PP = TT ; else {
while ( ++ PP < EE && ! ( EqN( * PP,"--", 2 )
&& EqiN( PP [ 1 ],"Content-", 8 ) ) ) __Fm if ( PP >= EE ) return ; }
LOOP { TT = PP ; LnP Nam = 0 ; const int NamMax = 300 ;
int TTL = strlen ( * TT ), B64 = 0, Text = 0, HTML = 0 ; LnP B = 0 ;
while ( ++ PP < EE && * * PP ) { P = * PP ;
int _Text, Type = EqiN( P,"Content-Type:", 13 );
if ( Type ) _Text = EqiN( P += 13," Text", 5 ), P += 5 ;
if ( _Text && ( EqiN( P,"/HTML", 5 ) || EqiN( P,"/x-vcard", 8 ) )
|| EqiN( P,"Content-ID:", 11 ) ) { HTML = 1 ; continue ; }
if ( EqiN( P,"Content-Transfer-Encoding: Base64", 33 ) ) {
B64 = 1 ; continue ; }
if ( Type && ( ( B = Find ( '"', P, 70 ) )
|| ( B = Find ( '"', PP [ 1 ], 70 ) ) )
&& ( P = Find ( '"', ++ B, NamMax ) ) ) * P = 0, Text = _Text ; }
if ( PP + 1 >= EE ) return ; int Att = B64 && B ; FILE * fp ;
if ( Att ) { LnP P = B, T, SS = "\\/:", S = SS - 1 ;
while ( * ++ S )
while ( T = strchr ( P, * S ) ) * T = '_', P = ++ T ;
fp = fopen ( Nam = P, "wb" ); Att = fp != 0 ;
if ( ! Att )
_Fm ( ( "_ %s _ Can't be Created.", Nam ) ) }
if ( ! Att ) { PP -- ;
while ( ++ PP < EE && ! EqN( * PP, * TT, TTL ) )
if ( ! HTML ) _Fm ( ( "%s", * PP ) )
if ( PP + 1 >= EE ) return ;
if ( EqiN( PP [ 1 ],"Content-", 8 ) ) continue;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break;
if ( PP + 1 < EE && EqN( * PP ,"--", 2 )
&& EqiN( PP [ 1 ],"Content-", 8 ) ) continue; return ; }
P = Buff - 1 ; uint X, By, Cnt ; X = By = Cnt = 0 ; int WNCR = 1 ;
LOOP { LnP S = * ++ PP ;
if ( ( rv = P - Buff + 1 ) > Buff_Room - 200 )
fwrite( Buff, 1, rv, fp ), P = Buff - 1 ;
if ( PP + 1 >= EE ) return ;
if ( EqN( * PP, * TT, TTL ) ) {
if ( EqiN( PP [ 1 ],"Content-", 8 ) ) break ;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break ;
break ; } S -- ;
LOOP { if ( ! * ++ S ) break ; X <<= 6 ; X |= Tab [ * S ];
if ( * S != '=' ) By ++ ; if ( ++ Cnt < 4 ) continue;
LnP XP = ( ( LnP ) & X ) + 3 ;
Loop( By * 6 / 8 ) { char C = * -- XP ;
if ( ! Text || C != 13 || WNCR ) * ++ P = C ; WNCR = C != 13 ; }
X = By = Cnt = 0 ; } }
if ( Text ) * ++ P = 13, * ++ P = 10 ;
fwrite( Buff, 1, P - Buff + 1, fp ); fclose( fp );
_Fm ( ( " Created: _ %s _", Nam ) ) } return ; }
PP = BB ; int Pgp = 0 ; // Ln.P - Ln.B I_Cook
while ( ++ PP < EE ) {
if ( EqN( * PP, "-----BEGIN PGP", 14 ) ) { Pgp = 1 ; continue ; }
if ( Pgp ) {
if ( EqN( * PP, "-----END PGP", 12 ) ) Pgp = 0 ; continue ; }
__Fm } }
.+.+.+.+.+.+
#define LOOP while ( 1 )
#define Loop( N ) int J = - 1, LLL = N ; while ( ++ J < LLL )
#define Eq ! strcmp
#define Eqi ! stricmp
#define EqN ! strncmp
#define EqiN ! strnicmp
#define Re_Buff( N ) free( Buff ), \
Buff = ( LnP ) malloc( Buff_Room = ER ( N, est_Buff ) )
#define _Fm( X ) { int p = PP - Ln.B, e = EE - Ln.B, t = TT - Ln.B ; \
Fm X ; PP = Ln.B + p ; EE = Ln.B + e ; TT = Ln.B + e ; }
#define __Fm if ( ! ( * ( P = * PP ) == '>' \
&& ( P [ 1 ] == '>' || P [ 2 ] == '>' ) ) ) _Fm ( ( "%s", P ) )
typedef unsigned char uchar ;
typedef char * LnP ; typedef LnP * LnA ; struct LnT { LnA B, P, E ; };
const int est_Buff = 10240 ;
int Buff_Room ;
uchar Tab [ 'z' + 1 ], _Tab [ 64 ];
__int64 ER ( __int64 X, __int64 Y ) { return X > Y ? X : Y ; }
__int64 er ( __int64 X, __int64 Y ) { return X < Y ? X : Y ; }
LnP Find ( char C, LnP B, int N ) {
if ( N -- <= 0 ) return 0 ; LnP P = B - 1 ;
LOOP
if ( * ++ P == C ) return P ;
else if ( P - B >= N || ! * P ) return 0 ; }
.+.+.+.+.+.+
Re_Buff( 0 );
{ LnP CC = "Aa0+/"; int X = -1 ;
Loop( strlen ( CC ) ) {
int L = J > 2 ? 1 : J == 2 ? 10 : 26 ; char C = CC[ J ] - 1 ;
Loop( L ) Tab [ _Tab[ X ] = ++ C ] = ++ X ; } }
I just fixed a buffer overflow problem
caused by posts that begin like this
( Leave it to unix deviants to intentionally post malformed HTML ):
<html><input type crash></html> news:slrncu9h9r.gve.The-Central-***@linux.client.comcast.net
The problem and the fix are described in the code below
( from http://www.Cotse.NET/users/jeffrelf/X.CPP ).
// BB points to the first line ( not char ), EE to one beyond the last line.
// Mode == 1 if BB points to the raw header rather than just the body.
Attachmnts ( int Mode, LnA BB, LnA EE ) { LnA TT = 0, PP = BB ; LnP P ;
// if ( Mode == 2 ) { while ( ++ PP < EE ) __Fm return ; }
if ( Mode == 1 ) {
while ( ++ PP < EE && * * PP ) __Fm BB = PP ; _Fm(("")) }
if ( PP + 1 >= EE ) return ;
if ( EqiN( PP [ 1 ], "<HTML>", 6 ) ) {
while ( ++ PP < EE ) { LnP T, B = * PP, P = B - 1 ; char C = 1 ;
while ( C && * ( T = P + 1 ) ) {
while ( ( C = * ++ P ) && C != '<' ); int Tag = C == '<' && P [ 1 ];
if ( P > T ) { * P = 0 ; P = T - 1 ; C = 1 ;
while ( C && * ( T = P + 1 ) ) {
while ( ( C = * ++ P ) && C != '&' );
if ( ! C || ! P [ 1 ] ) B += Str( B, "%s", T );
else { * P = 0 ; C = * ++ P ; char CC ;
if ( C == '#' ) { CC = atoi( ++ P );
while ( ( C = * ++ P ) && isdigit( C ) ); }
else {
if ( EqiN( P, "lt", 2 ) ) CC = '<' ;
else if ( EqiN( P, "gt", 2 ) ) CC = '>' ; else CC = '¿' ;
while ( ( C = * ++ P ) && C != ';' ); }
// By only adding a character when the   type snytax
// is found, as I do here, a buffer overflow is avoided.
// Before, it was adding a char to all lines, regardless,
// which only worked for HTML were tags and such
// are always removed from each line, leaving room
// ( e.g. HTML-only e-mails from Hotmail.COM and vendors ).
B += Str( B, "%s%c", T, CC ); } } }
if ( Tag ) while ( ( C = * ++ P ) && C != '>' ); }
if ( B == * PP ) continue ; __Fm } return ; }
if ( EqN( PP [ 1 ], "-----BEGIN PGP", 14 ) ) BB = PP += 3 ;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break; int Body = 0 ;
if ( PP < EE && EqN( * PP ,"--", 2 ) && EqiN( PP [ 1 ],"Content-", 8 ) ) {
TT = PP ;
while ( ++ PP < EE && * * PP )
if ( EqiN( * PP,"Content-Type: text/plain", 24 ) ) Body = 1 ;
if ( ! Body ) PP = TT ; else {
while ( ++ PP < EE && ! ( EqN( * PP,"--", 2 )
&& EqiN( PP [ 1 ],"Content-", 8 ) ) ) __Fm if ( PP >= EE ) return ; }
LOOP { TT = PP ; LnP Nam = 0 ; const int NamMax = 300 ;
int TTL = strlen ( * TT ), B64 = 0, Text = 0, HTML = 0 ; LnP B = 0 ;
while ( ++ PP < EE && * * PP ) { P = * PP ;
int _Text, Type = EqiN( P,"Content-Type:", 13 );
if ( Type ) _Text = EqiN( P += 13," Text", 5 ), P += 5 ;
if ( _Text && ( EqiN( P,"/HTML", 5 ) || EqiN( P,"/x-vcard", 8 ) )
|| EqiN( P,"Content-ID:", 11 ) ) { HTML = 1 ; continue ; }
if ( EqiN( P,"Content-Transfer-Encoding: Base64", 33 ) ) {
B64 = 1 ; continue ; }
if ( Type && ( ( B = Find ( '"', P, 70 ) )
|| ( B = Find ( '"', PP [ 1 ], 70 ) ) )
&& ( P = Find ( '"', ++ B, NamMax ) ) ) * P = 0, Text = _Text ; }
if ( PP + 1 >= EE ) return ; int Att = B64 && B ; FILE * fp ;
if ( Att ) { LnP P = B, T, SS = "\\/:", S = SS - 1 ;
while ( * ++ S )
while ( T = strchr ( P, * S ) ) * T = '_', P = ++ T ;
fp = fopen ( Nam = P, "wb" ); Att = fp != 0 ;
if ( ! Att )
_Fm ( ( "_ %s _ Can't be Created.", Nam ) ) }
if ( ! Att ) { PP -- ;
while ( ++ PP < EE && ! EqN( * PP, * TT, TTL ) )
if ( ! HTML ) _Fm ( ( "%s", * PP ) )
if ( PP + 1 >= EE ) return ;
if ( EqiN( PP [ 1 ],"Content-", 8 ) ) continue;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break;
if ( PP + 1 < EE && EqN( * PP ,"--", 2 )
&& EqiN( PP [ 1 ],"Content-", 8 ) ) continue; return ; }
P = Buff - 1 ; uint X, By, Cnt ; X = By = Cnt = 0 ; int WNCR = 1 ;
LOOP { LnP S = * ++ PP ;
if ( ( rv = P - Buff + 1 ) > Buff_Room - 200 )
fwrite( Buff, 1, rv, fp ), P = Buff - 1 ;
if ( PP + 1 >= EE ) return ;
if ( EqN( * PP, * TT, TTL ) ) {
if ( EqiN( PP [ 1 ],"Content-", 8 ) ) break ;
Loop( 7 ) if ( ++ PP >= EE || EqN( * PP ,"--", 2 ) ) break ;
break ; } S -- ;
LOOP { if ( ! * ++ S ) break ; X <<= 6 ; X |= Tab [ * S ];
if ( * S != '=' ) By ++ ; if ( ++ Cnt < 4 ) continue;
LnP XP = ( ( LnP ) & X ) + 3 ;
Loop( By * 6 / 8 ) { char C = * -- XP ;
if ( ! Text || C != 13 || WNCR ) * ++ P = C ; WNCR = C != 13 ; }
X = By = Cnt = 0 ; } }
if ( Text ) * ++ P = 13, * ++ P = 10 ;
fwrite( Buff, 1, P - Buff + 1, fp ); fclose( fp );
_Fm ( ( " Created: _ %s _", Nam ) ) } return ; }
PP = BB ; int Pgp = 0 ; // Ln.P - Ln.B I_Cook
while ( ++ PP < EE ) {
if ( EqN( * PP, "-----BEGIN PGP", 14 ) ) { Pgp = 1 ; continue ; }
if ( Pgp ) {
if ( EqN( * PP, "-----END PGP", 12 ) ) Pgp = 0 ; continue ; }
__Fm } }
.+.+.+.+.+.+
#define LOOP while ( 1 )
#define Loop( N ) int J = - 1, LLL = N ; while ( ++ J < LLL )
#define Eq ! strcmp
#define Eqi ! stricmp
#define EqN ! strncmp
#define EqiN ! strnicmp
#define Re_Buff( N ) free( Buff ), \
Buff = ( LnP ) malloc( Buff_Room = ER ( N, est_Buff ) )
#define _Fm( X ) { int p = PP - Ln.B, e = EE - Ln.B, t = TT - Ln.B ; \
Fm X ; PP = Ln.B + p ; EE = Ln.B + e ; TT = Ln.B + e ; }
#define __Fm if ( ! ( * ( P = * PP ) == '>' \
&& ( P [ 1 ] == '>' || P [ 2 ] == '>' ) ) ) _Fm ( ( "%s", P ) )
typedef unsigned char uchar ;
typedef char * LnP ; typedef LnP * LnA ; struct LnT { LnA B, P, E ; };
const int est_Buff = 10240 ;
int Buff_Room ;
uchar Tab [ 'z' + 1 ], _Tab [ 64 ];
__int64 ER ( __int64 X, __int64 Y ) { return X > Y ? X : Y ; }
__int64 er ( __int64 X, __int64 Y ) { return X < Y ? X : Y ; }
LnP Find ( char C, LnP B, int N ) {
if ( N -- <= 0 ) return 0 ; LnP P = B - 1 ;
LOOP
if ( * ++ P == C ) return P ;
else if ( P - B >= N || ! * P ) return 0 ; }
.+.+.+.+.+.+
Re_Buff( 0 );
{ LnP CC = "Aa0+/"; int X = -1 ;
Loop( strlen ( CC ) ) {
int L = J > 2 ? 1 : J == 2 ? 10 : 26 ; char C = CC[ J ] - 1 ;
Loop( L ) Tab [ _Tab[ X ] = ++ C ] = ++ X ; } }