//========================================================================= #define PROGRAMMER "BAHN, William" #define PROG_CODE "bahw" #define COURSE "ECE-1021" #define YEAR (2003) #define TERM "Fall" #define SECTION (0) #define ASSIGNMENT "EXAMPLE #1" #define REVISION (0) #define TITLE "Dynamic Memory Allocation" #define SUBTITLE "Last Modified on 20 OCT 03" #define EMAIL "wbahn@eas.uccs.edu" #define FILENAME "parser.c" //========================================================================= // PROBLEM: // // Write a function that accepts three arguments: // 1) a pointer to the string to be parsed. // 2) a pointer to the delimiter string. // 3) a pointer to an array of pointers (to chars). // // The function should parse the string based on the delimiter string. // // PSEUDOCODE: // // The algorithm is basically identical to the algorithm worked up in // class: // // We'll refer to everything between a pair of delimiters to a "token" // // 1) TASK: Initialize variables // 1.1) SET: m = 0 (Start at the beginning of the string) // 1.2) SET: c = 0 (number of tokens found) // 2) WHILE: Unscanned characters remain in string. // 2.1) TASK: Skip across any delimiters to find start of token. // 2.2) TASK: Mark the start of the token. // 2.3) TASK: Skip across any non-delimiters to find end of token. // 2.4) TASK: Mark the end of the token. // 2.5) TASK: Allocate memory for the token and store pointer in array. // 2.6) TASK: Copy the token to the newly allocated memory. // POINTER TO A POINTER (to a pointer...to a pointer....): // // In main() we will use a variable defined as: // // char **pval; // // This would be the same as: // // char *pval[] // // except that this is a syntax error because, with this syntax, the // compiler expects to find out what the actual dimension of the // array is so that it can allocate memory for it at compile time but // we plan to dynamically add memory to the array as needed. // // However, it is suitable to use this syntax in function prototypes since // they don't need to know how big the array is - just that the declared // variable is a pointer to an array. // // The way to interpret this is that the variable pval contains an address. // That address is a pointer to something - what is that something? The // something is a pointer to a character (aka a string pointer). // // The value stored at *pval is a string pointer. Any place that we // need to use a pointer to a string (such as printf() when printing // out a string) we could use *pval if what we want is the string that is // referenced by the first element of the pval array. We can accomplish // exactly the same thing using the array syntax pval[0]. For subsequent // strings, we simply use the appropraite index. // // Since we are going to dynamically allocate the memory that pval points // to (as well as the memory that each element within pval points to) we // need to pass the address of pval to the parser() function so that it // can assign the proper value to pval so that the calling function has // access to it. // // This means that the value passed to parser() is a pointer that points // to a pointer that points to pointer that points to a string. // // Sound confusing? It is. So throw some names at these various levels: // // We are going to get a value passed to the function parser() and are // going to store it in a variable name Pete. The value stored in Pete // is the address of Aaron - so Pete points to Aaron. The value stored // in the variable Aaron is the address of Sam, so Aaron points to Sam. // The value stored in Sam is the address where a we can find the first // character in a character string so Sam is a string pointer. // // Now, since a pointer to a variable is the same as a pointer to an // array of variables (of that same data type), we can treat Aaron as // a pointer to an array. and, because Aaron points to an array, there // is (possibly at least) additional elements in the array each of which // points to a different string - say Steve, Sue, and Sharon. // // You're probably still pretty confused, so let's walk through the // development from the other direction. // // The "->" means "points to" and the best way to read the first sentence // below is: The value store in the variable Sam points to the first // character in the string "the first string". Understanding that what // is meant by "points to" is that the value stored in Sam is the address // of the first character in the string. // // Sam -> "the first string" // Steve -> "the second string" // Sue -> "the third string" // Sharon -> "the fourth string" // // Aaron[0] -> Sam // Aaron[1] -> Steve // Aaron[2] -> Sue // Aaron[3] -> Sharon // // Aaron -> Aaron[0] // // If we just passed Aaron to our parser() function, then the function // could access the strings and change the values stored in the array, // but it could not change where the array itself is stored. That's // because all we were told by main() is the value stored in Aaron and // what we need to know is where Aaron is stored so that we can change // that value. // // Pete -> Aaron // // And by passing Pete to the function, we can allocate memory for the // array and store the pointer to that array at the address stored in // Pete (which is the address of Aaron) // // So, using the pointer and array syntax in main(), we have: // NOTE: "<=>" means "equivalent to" // Aaron <=> token (same as &Aaron[0] as well) // Sam <=> token[0] // Sue <=> token[1] // Pete <=> &token // Sam and Sue are of data type (char *) (pointer to a string) // Aaron is of data type ((char *) *) which is the same as char ** // Pete is of data type (((char *) *) *) which is the same as char *** // Pete is the value that we are going to pass to parser, so the parameter // must be of type (char ***) in order to work. // Instead of thinking of the variable (which we will call token_ptr) as // being a pointer to a pointer to a pointer to a string (which it IS) // it's a lot more meaningful to say that token_ptr is a pointer to an // array of string pointers. // int parser(char *msg, char *delim, char ***token_ptr); // // Within parser, the value of Pete is stored in the variable token_ptr // So just as Pete is the address of the Aaron array, token_ptr is the // address of (aka, a pointer to) the token array and by changing // token_ptr we change where in memory the token array is stored. To be // a bit more precise, changing token_ptr does not actually change where // the array is stored - it just changes where we think it is stored. // What we are actually doing is making sure that the value that we store // in token_ptr IS the address where the token array is actually located. // // Building up our equavalence table for token_ptr: // // token_ptr <=> Pete <=> &Aaron // *token_ptr <=> Aaron <=> &Aaron[0] // **token_ptr <=> Aaron[0] // (*token_ptr)[n] <=> Aaron[n] // // This last line is probably confusing - but consider a case that you are // already familiar with: // // void junk(char *s) // { // *s = 'H'; // *s is the first character in string - it's the actual // // character pointed to by s. // s[0] = 'H'; // This is an equivalent syntax // return; // } // // So, if *identifier points to the first element of an array, then the // syntax "identifier[n] = value" will store "value" in the nth element // of that array. // // In our case, identifier happens to be *token_ptr. // // So why, you might ask, don't we just write: // // *token_ptr[n] // // The problem with this is order of operations. We have two operators, // the '*' (pointer dereference operator) and [] (array index operator). // The [] operator has the higher precedence and so this is the same as: // // *(token_ptr[n]) // // which is NOT what we want. So we use parentheses to force the desired // evaluation onto the compiler // // When we want to change the address that the array is stored at, we want // the following statement (conceptually): // // Aaron = new_address of array // // Using the equivalence table, the necessary syntax is: // // *token_ptr = new_address // //== INCLUDE FILES ======================================================== #include // printf(), scanf() #include // gotoxy() #include // malloc() //== FUNCTION PROTOTYPES ================================================== void PrintHeader(void); int StringLength(char *s); int parser(char *msg, char *delim, char ***token); int StripCR(char *s); void ReleaseMemory(char ***token_ptr, int count); //== MACRO DEFINITIONS ==================================================== #define FALSE (0) #define TRUE (!FALSE) //== MAIN FUNCTION ======================================================== int main(void) { char msg[81]; char delim[81]; char **token; // pval is a pointer that points to a pointer to a char. int count; int i; PrintHeader(); printf("\n"); // Print a blank line gotoxy(1, 12); printf("Enter the string to parse:\n"); fgets(msg, 81, stdin); StripCR(msg); gotoxy(1, 15); printf("Enter the string of delimiters:\n"); fgets(delim, 81, stdin); StripCR(delim); token = NULL; count = parser(msg, delim, &token); printf("\n"); printf("%s\n", msg); printf("Found %i tokens.\n", count); for(i = 0; i < count; i++) { printf("%s\n", token[i]); } ReleaseMemory(&token, count); return; } //== SUPPORT FUNCTIONS ==================================================== void PrintHeader(void) { printf("========================================" "=======================================\n"); printf("Course....... %s-%i (%s %i)\n", COURSE, SECTION, TERM, YEAR); printf("Programmer... %s (%s)\n", PROGRAMMER, PROG_CODE); printf("Assignment... %s (Rev %i) (Source Code in %s)\n", ASSIGNMENT, REVISION, FILENAME); printf("Description.. %s\n", TITLE); printf(" %s\n", SUBTITLE); printf("========================================" "=======================================\n"); return; } int parser(char *msg, char *delim, char ***token_ptr) { int mlen, dlen; int m, d, c; int start, stop; int length; int i; mlen = StringLength(msg); dlen = StringLength(delim); c = 0; // Count of how many substrings have been found. m = 0; // Start at beginning of msg while(m < mlen) { // TASK: Find the start of the next token. // Find first occurance of any character NOT in delim within msg for(start = -1; (m < mlen) && (0 > start); m++) { for(d = 0; (d < dlen) && (msg[m] != delim[d]); d++); // ; intented if(d >= dlen) // A character was not found start = m; } if(start >= 0) // Found a token (a character that's NOT a delimiter). { //--------------------------------------------------------------- // TASK: Find end of the present token // Find first occurance of any character in delim within msg for(stop = -1; (m < mlen) && (0>stop); m++) { for(d = 0; (d < dlen) && (msg[m] != delim[d]); d++); // ; intented if(d < dlen) // A delimiter found stop = m; } if((m == mlen)&&(0>stop)) // End of string with no end of token stop = mlen; //--------------------------------------------------------------- // TASK: Create memory for token and place pointer to it into array length = stop - start; // length of string (w/o NULL terminator) // Allocate enough memory for the token and its pointer // 1) Allocate new memory at the end of the pointer array *token_ptr = realloc(*token_ptr, (c+1)*sizeof(char *)); // 1) Allocate memory for the token itself and store // the address of that memory block in the array. (*token_ptr)[c] = malloc( (length+1) * sizeof(char)); // Copy substring to the allocated memory for(i=0; i