Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

3 Programming Question (45 points) 3.1 Instructions You need to write the code b

ID: 3601402 • Letter: 3

Question

3 Programming Question (45 points) 3.1 Instructions You need to write the code by yourself. Your implementation must use C/C++ and your code must run on the Linux machine general.asu.edu. Please refer to the programming guide (available under the Assignments folder on Blackboard) for using the general.asu.edu server, as well as compiling and running C/C++ code under Linux For this question, you need to provide a Makefile that compiles your program to an executable named a3 that runs on the Linux machine general.asu.edu. Our TA will write a script to compile and run all student submissions; therefore, executing the command make in the Code folder must produce the executable a3 also located in the Code folder 3.2 Requirements For this part, you need to first review Section 10.4, Representing rooted trees. The requirements are as follows. You will write a serial program that executes a sequence of commands that operate on individual files. Valid commands include: Start Name, where Name is a character string of maximum length 20 alphabetic characters representing the name of the data file (Name.txt). Name.txt contains a set of data entries (integer type, one per line). This command reads all the data entries into an array (dynamically allocated). The output of the Start command is: Processing data from: Name.txt Build-BST, which builds a BST for the data entries in the array by calling the TREE-INSERT algorithm (pseudo code can be found in the lecture notes) repeatedly to insert the numbers one by one and output Building BST . INORDER-TREE-WALK, which calls the INORDER-TREE-WALK algorithm (pseudo code can be found in the lecture notes). Note that if the BST has not been built yet, output Build BST first! Otherwise, the output of this step should be sorted. For example, if the user-provided file contains the following data entries (one per line): 9, 2, 10, 4, 5, the output of your INORDER-TREE-WALK algorithm should be 2, 4, 5, 9, 10 End Name, which indicates the end of the processing for the data in file Name.txt. The Start and End commands will always come in pairs with matching names. Any memory dynamically allocated for Name must be freed on an End command. The output of the End command is: End of processing data from: Name.txt . Exit, which indicates that there are no more commands to execute, i.e., the program terminates Here is a sample command sequence Start test 1 Build-BST INORDER-TREE-WALK End test1 Start test 2 INORDER-TREE-WALK

Explanation / Answer

#include "sorter.h"

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include "mergesort.c"

FILE *file;


void addTail(LL* dlist, data cdata){ //takes a linked list and a data, adds the data to a node and puts it at the end of the linked list

Node* datanode = (Node*) malloc(sizeof(Node)); //allocate space for a node

datanode->next = NULL;

datanode->ndata = cdata; //set the new node's ndata to cdata

if((dlist->head == NULL) && (dlist->tail == NULL))

{ //empty list case

dlist->head = datanode;

dlist->tail = datanode;

dlist->count++;

}

else if((dlist->head == dlist->tail)){ //single node case

dlist->tail = datanode;

dlist->head->next = dlist->tail;

dlist->count++;

}

else{// every other case

dlist->tail->next = datanode;

dlist->tail = datanode;

dlist->count++;

}

}

void Finish(LL* dlist){ //frees everything in a LL

Node* temp = dlist->head;

int n = 0;

while(temp!= NULL){

char** ftemp = temp->ndata.fielddata;

for(n = 0;n < dlist->numfields;n++){

free(ftemp[n]);

}

free(ftemp);

Node* oldtemp = temp;

temp = temp->next;

free(oldtemp);

}

for(n = 0;n < dlist->numfields;n++){

free(dlist->fields[n]);

}

free(dlist->fields);

free(dlist->types);

free(dlist);

}

void initializeList(LL* dlist, char* fields){ //initializes the numfields, types, and fields in the linked list

char* temp = strdup(fields);

char* currvar = (char*) strtok(temp, ",");

int totalfields = 0; //counts total fields, determined by the number of tokens found

while(currvar != 0){ //discovers total number of fields

totalfields++;

currvar = (char*) strtok(NULL,",");

}

dlist->numfields = totalfields;

dlist->types = (int*) malloc(sizeof(int)*dlist->numfields); //allocates fields for keeping track of what type each field is, to be used once all data is received

dlist->fields = (char**) malloc(sizeof(char*)*dlist->numfields); //keeps track of the name of the fields, to be used later when sorting

free(temp);

temp = strdup(fields);

currvar = (char*) strtok(temp, ",");

int n;

for(n = 0;n < dlist->numfields;n++){

if(n == dlist->numfields-1){ //strips the newline character for the last field

int specchar = strcspn(currvar, " "); //find the location of the first or the first

if(specchar != 0){ //if the location isn't the start

currvar[specchar] = ''; //remove the special characters

}

}

dlist->fields[n] = strdup(currvar);

currvar = (char*) strtok(NULL,",");

}

free(temp);

free(currvar);

}

void initializeListTypes(LL* dlist){ //determines what type of data belongs to what field

if(dlist->head == NULL){

return;

}

Node* temp = dlist->head;

int n = 0;

for(n;n < dlist->numfields;n++){

char* tempstr = temp->ndata.fielddata[n];

char* endptr;

long lg = 0;

float fl = 0.0;

if(n == dlist->numfields-1){ //if this field is the last field on the line, remove and

int specchar = strcspn(tempstr, " "); //find the location of the first or the first

if(specchar != 0){ //if the location isn't the start

tempstr[specchar] = ''; //remove the special characters

}

}

if(strchr(tempstr, '.')){ //if it has a '.', it's a string or a float/double

fl = strtof(tempstr, &endptr);

if((endptr == tempstr) || (endptr != (tempstr+strlen(tempstr)))){ //if endptr == tempstr, no floats at the start, the other conditional checks to make sure that the end of the read in float is the end of the string inputted

dlist->types[n] = 0;

}

else{

dlist->types[n] = 2;

}

}

else{ //if it has no '.', it's either an int or a string

lg = strtol(tempstr, &endptr, 10);

if((endptr == tempstr) || (endptr != (tempstr+strlen(tempstr)))){ //if endptr == tempstr, no floats at the start, the other conditional checks to make sure that the end of the read in float is the end of the string inputted

dlist->types[n] = 0;

}

else{

dlist->types[n] = 1;

}

}

}

}

void export(LL* dlist){

// output to a file FOR TESTING PURPOSES

//char const *fileName = "sortedmovies.csv";

//FILE *fp = fopen(fileName, "w");

//if(fp == NULL){

//  printf("error");

//}

int n = 0;

for(n = 0;n < dlist->numfields;n++){

if(n != dlist->numfields-1){

// printf("%s,",dlist->fields[n]);

    fprintf(stdout,"%s,", dlist->fields[n]);

}

else{

// printf("%s ",dlist->fields[n]);

    fprintf(stdout, "%s ",dlist->fields[n]);

}

}

Node* temp = dlist->head;

while(temp != NULL){

if(temp->ndata.comma == NULL){ //if there's no commas in the fields

for(n = 0;n < dlist->numfields;n++){

if(n != dlist->numfields-1){

// printf("%s,",temp->ndata.fielddata[n]);

    fprintf(stdout,"%s,",temp->ndata.fielddata[n]);

}

else{

// printf("%s ",temp->ndata.fielddata[n]);

    fprintf(stdout,"%s ",temp->ndata.fielddata[n]);

}

}

}

else{

for(n = 0;n < dlist->numfields;n++){

if(temp->ndata.comma[n] == 0){

if(n != dlist->numfields-1){

//printf("%s,",temp->ndata.fielddata[n]);

    fprintf(stdout,"%s,",temp->ndata.fielddata[n]);

}

else{

//printf("%s ",temp->ndata.fielddata[n]);

    fprintf(stdout,"%s ",temp->ndata.fielddata[n]);

}

}

else{

if(n != dlist->numfields-1){

//printf(""%s",",temp->ndata.fielddata[n]);

    fprintf(stdout,""%s",",temp->ndata.fielddata[n]);

}

else{

//printf(""%s" ",temp->ndata.fielddata[n]);

    fprintf(stdout,""%s" ",temp->ndata.fielddata[n]);

}

}

}

}

temp = temp->next;

}

//fclose(fp);

}

int main(int argc, char *argv[]){

//Constructs an array containing all of the data data into cdata

file = stdin;

LL* dlist = (LL*) malloc(sizeof(LL)); //malloc a data linked list to which data nodes will be added

char* test = (char*) malloc(sizeof(char)*1000); //for getting data from the file

memset(test, 0 , sizeof(char)*1000);

//i should make sure there's no problem when there's more than 1000 chars

fgets(test,1000,file); //skips the column line

initializeList(dlist, test);

char c;

int currentvar=0;

int count = 0;

int place = 0;

int quote = 0;

int x = 0;

data cdatanode = {};

cdatanode.fielddata = (char**) malloc(sizeof(char*)*dlist->numfields);

cdatanode.comma = NULL;

memset(test, 0 , sizeof(char)*1000);

while((c = fgetc(file)) != EOF){

/*

if(c < 0 || c > 128){

continue;

}

*/

    if(x < 200)

    {

        printf("C is %c ", c);

        printf("Test is %s ",test);

    }

if((c == ',' || c == ' ') && (quote == 0)){

    

cdatanode.fielddata[currentvar] = strdup(test); //copy test to cdatanode STRDUP IS MALLOC, REMEMBER TO FREE

    

if(c == ' '){

addTail(dlist, cdatanode);

count++;

currentvar = 0;

place = 0;

memset(test, 0 , sizeof(char)*1000);

cdatanode.fielddata = (char**) malloc(sizeof(char*)*dlist->numfields); //get new char array for cdatanode

cdatanode.comma = NULL;

}

else{

currentvar++;

place = 0;

memset(test, 0 , sizeof(char)*1000);

}

}

else if(c == '"'){

if(quote == 0){

//insert comma handling here, somehow

cdatanode.comma = (int*) malloc(sizeof(int)*dlist->numfields);

memset(cdatanode.comma,0,sizeof(sizeof(int)*dlist->numfields));

cdatanode.comma[currentvar] = 1;

quote = 1;

}

else{

quote = 0;

}

}

else{

test[place] = c;

place++;

}

x++;

}

free(cdatanode.fielddata);

free(test);

initializeListTypes(dlist);

if(argc >= 3){

if(strcmp("-c", argv[1]) == 0){

mergeSortBegin(dlist, argv[2]);

}

}

export(dlist); //exports the dlist into csv

}

SORTER.H

//Suggestion: define a struct that mirrors a record (row) of the data set

#ifndef _SORTER_H_

#define _SORTER_H_

typedef struct _data { //THIS DATA STRUCT IS CURRENTLY BAD, PUT DATA IN NODE

char** fielddata; //2d array to contain string types

int* comma; //does the node have a comma in the field n? 0 = no, 1 = yes

//might want to make the data types a linked list also?

//or make a LL to keep track when creating, then place into array??

} data;

typedef struct _Node {

data ndata; //data in a node

//char** data; use this instead of a data struct

struct _Node* next;

}Node;

typedef struct _LL { //linked list that contains the nodes with the movies

Node* head;

Node* tail;

int count; //count of total nodes in list

int numfields; //count of the total fields in the csv

int sortingfield; //keeps track of the field to be sorted, where sortingfield = # field sorted

int sortingtype; //keeps track of what type is being sorted, 0 = string, 1 = int, 2 = float

int* types; //keeps track of types, 0 = string, 1 = int, 2 = float

char** fields; //keeps track of the name fields

}LL;

//Suggestion: prototype a mergesort function

LL* mergesortBegin(LL* dlist, char* field);

Node* mergeSort(LL* dlist, Node* head);

Node* split(Node* head);

Node* merge(LL* dlist, Node* left, Node* right);

#endif

MERGESORT.C

#include "sorter.h"

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

FILE *file;

void mergeSortBegin(LL* dlist, char* field){ //takes in a data struct array and a field to sort by

if((dlist->head == NULL) || (dlist->head == dlist->tail)){ //if no nodes or one nodes, already solved

printf("There are no entries in the csv, cannot sort.");

return;

}

int n = 0;

int found = 0;

for(n = 0;n < dlist->numfields; n++){ //determines the field to be sorted

if(strcmp(field,dlist->fields[n]) == 0){ //note: 0 = string, 1 = int, 2 = float

dlist->sortingfield = n; //determines the field to sort by

    //printf("%d ", n);

//printf("%s ", field);

found = 1;

}

}

if(found == 0){

printf("Field not found. Please sort by one of the fields in the csv file. ");

}

dlist->sortingtype = dlist->types[dlist->sortingfield];

//printf("%d ", dlist->sortingtype);

dlist->head = mergeSort(dlist,dlist->head);

}

Node* mergeSort(LL* dlist, Node* head){//note: String's mergesort

if((head == NULL) || (head->next == NULL)){

return head;

}

Node* temp = head;

int n = 0;

while(temp != NULL){

if(strcmp(temp->ndata.fielddata[11], "xXx ") ==0){

// printf("%s, %d", temp->ndata.fielddata[11],n);

}

temp = temp->next;

n++;

}

Node* mid = split(head);

head = mergeSort(dlist, head);

mid = mergeSort(dlist, mid);

head = merge(dlist,head,mid);

return head;

}

Node* split(Node* head){ //given a head node, returns a pointer to the middle node

if((head == NULL) || (head->next == NULL)){

return NULL;

}

Node* temp = head->next;

Node* prev = head;

while(temp != NULL){

temp = temp->next;

if(temp != NULL){

prev = prev->next;

temp = temp->next;

}

}

Node* midpt = prev->next;

prev->next = NULL;

return midpt;

}

Node* merge(LL* dlist, Node* left, Node* right){

if( left == NULL){

return right;

}

else if(right == NULL){

return left;

}

if(dlist->sortingtype == 0){ //if sorting a string

//alphabetic sorting method

/*

char* leftstr = strdup(left->ndata.fielddata[dlist->sortingfield]);

int n = 0;

for(n = 0;n < strlen(leftstr);n++){

leftstr[n] = tolower(leftstr[n]);

}

char* rightstr = strdup(right->ndata.fielddata[dlist->sortingfield]);

for(n = 0;n < strlen(rightstr);n++){

rightstr[n] = tolower(rightstr[n]);

}

*/

char* leftstr = left->ndata.fielddata[dlist->sortingfield];

char* rightstr = right->ndata.fielddata[dlist->sortingfield];

int cmp = strcmp(leftstr,rightstr);

Node* final = NULL;

if(cmp <= 0){

final = left;

final->next = merge(dlist,left->next,right);

}

else{

final = right;

final->next = merge(dlist,left,right->next);

}

return final;

}

// if its an int

else if(dlist->sortingtype == 1) {

    // convert the char string to an int

    

    Node* final = NULL;

    int leftNumber = atoi(left->ndata.fielddata[dlist->sortingfield]);

    int rightNumber = atoi(right->ndata.fielddata[dlist->sortingfield]);

    

    

    if(leftNumber <= rightNumber) {

        final = left;

        final->next = merge(dlist,left->next,right);

    } else {

        final = right;

        final->next = merge(dlist,left,right->next);

    }

    return final;

}

}