By the given Input ): The first line of input will consist of two integers: The
ID: 3576206 • Letter: B
Question
By the given Input ):
The first line of input will consist of two integers: The number of rows of training data (T) and the expected number of clusters (K). The remaining (T) lines will consist of comma-separated data points, each of which will be integer values.Here We have ot implement the code as given the input above!
Example Input:
Output(To Standard Output):
You should place each input point in one of the K clusters, then output the points in each cluster, one per line. For k-Means clustering, you should output the center of each cluster on the first line of each cluster. Do not output "center" or such a point for hierarchical clustering.
Example Output:
We can use any kind of Classificartion Algorithm like "Naive Bayes Classification" or "CART Decision Tree"! It Doesn't matter What launguage Do we use But It's better to write a programm in Python or Matlab Or R lanugage!
Explanation / Answer
#include #include /* Function to assign `Naive Bayes'-style weights to each element of each document vector. */ void bow_naivebayes_set_weights (bow_barrel *barrel) { int ci; bow_cdoc *cdoc; int wi; /* a "word index" into WI2DVF */ int max_wi; /* the highest "word index" in WI2DVF. */ bow_dv *dv; /* the "document vector" at index WI */ int dvi; /* an index into the DV */ int weight_setting_num_words = 0; /* We assume that we have already called BOW_BARREL_NEW_VPC() on BARREL, so BARREL already has one-document-per-class. */ assert (!strcmp (barrel->method->name, "naivebayes") || !strcmp (barrel->method->name, "crossentropy")); max_wi = MIN (barrel->wi2dvf->size, bow_num_words()); /* The CDOC->PRIOR should have been set in bow_barrel_new_vpc(); verify it. */ for (ci = 0; ci cdocs->length; ci++) { cdoc = bow_array_entry_at_index (barrel->cdocs, ci); assert (cdoc->prior >= 0); } #if 0 /* For Shumeet, make all counts either 0 or 1. */ for (wi = 0; wi wi2dvf, wi); if (dv == NULL) continue; for (dvi = 0; dvi length; dvi++) { assert (dv->entry[dvi].count); dv->entry[dvi].count = 1; } } /* And set uniform priors */ for (ci = 0; ci cdocs->length; ci++) { cdoc = bow_array_entry_at_index (barrel->cdocs, ci); cdoc->prior = 1.0; } #endif /* Get the total number of terms in each class; store this in CDOC->WORD_COUNT. */ for (ci = 0; ci cdocs->length; ci++) { cdoc = bow_array_entry_at_index (barrel->cdocs, ci); cdoc->word_count = 0; } for (wi = 0; wi wi2dvf, wi); if (dv == NULL) continue; for (dvi = 0; dvi length; dvi++) { cdoc = bow_array_entry_at_index (barrel->cdocs, dv->entry[dvi].di); cdoc->word_count += dv->entry[dvi].count; } } /* Set the weights in the BARREL's WI2DVF so that they are equal to P(w|C), the probability of a word given a class. */ for (wi = 0; wi wi2dvf, wi); /* If the model doesn't know about this word, skip it. */ if (dv == NULL) continue; /* Now loop through all the elements, setting their weights */ for (dvi = 0; dvi length; dvi++) { cdoc = bow_array_entry_at_index (barrel->cdocs, dv->entry[dvi].di); /* Here CDOC->WORD_COUNT is the total number of words in the class */ /* We use Laplace Estimation. */ dv->entry[dvi].weight = ((float) (1 + dv->entry[dvi].count) / (barrel->wi2dvf->num_words + cdoc->word_count)); assert (dv->entry[dvi].weight > 0); } weight_setting_num_words++; /* Set the IDF. NaiveBayes doesn't use it; make it have no effect */ dv->idf = 1.0; } #if 0 fprintf (stderr, "wi2dvf num_words %d, weight-setting num_words %d ", barrel->wi2dvf->num_words, weight_setting_num_words); #endif } /* For changing weight of unseen words. I really should implement `deleted interpolation' */ /* M_EST_P summed over all words in the vocabulary must sum to 1.0! */ #if 0 /* This is the special case of the M-estimate that is `Laplace smoothing' */ #define M_EST_M (barrel->wi2dvf->num_words) #define M_EST_P (1.0 / barrel->wi2dvf->num_words) #define WORD_PRIOR_COUNT 1.0 #else #define M_EST_M (cdoc->word_count ? (((float)barrel->wi2dvf->num_words) / cdoc->word_count) : 1.0) #define M_EST_P (1.0 / barrel->wi2dvf->num_words) #endif int bow_naivebayes_score_loo (bow_barrel *barrel, bow_wv *query_wv, bow_score *bscores, int bscores_len, int loo_class) { double *scores; /* will become prob(class), indexed over CI */ int ci; /* a "class index" (document index) */ int wvi; /* an index into the entries of QUERY_WV. */ int dvi; /* an index into a "document vector" */ float pr_w_c; /* P(w|C), prob a word is in a class */ double pr_tf; /* P(w|C)^TF, ditto, by occurr's in QUERY_WV */ double log_pr_tf; /* log(P(w|C)^TF), ditto, log() of it */ double rescaler; /* Rescale SCORES by this after each word */ double new_score; /* a temporary holder */ int num_scores; /* number of entries placed in SCORES */ /* Allocate space to store scores for *all* classes (documents) */ scores = alloca (barrel->cdocs->length * sizeof (double)); /* Instead of multiplying probabilities, we will sum up log-probabilities, (so we don't loose floating point resolution), and then take the exponent of them to get probabilities back. */ /* Initialize the SCORES to the class prior probabilities. */ if (bow_print_word_scores) printf ("%s ", "(CLASS PRIOR PROBABILIES)"); for (ci = 0; ci cdocs->length; ci++) { bow_cdoc *cdoc; cdoc = bow_array_entry_at_index (barrel->cdocs, ci); if (bow_uniform_class_priors) scores[ci] = 1; else { /* LOO_CLASS is not implemented for cases in which we are not doing uniform class priors. */ assert (loo_class == -1); assert (cdoc->prior > 0.0f && cdoc->prior prior); if (((bow_params_naivebayes*)(barrel->method->params)) ->score_with_log_probabilities == bow_yes) scores[ci] = - scores[ci]; } assert (scores[ci] > -FLT_MAX + 1.0e5); if (bow_print_word_scores) printf ("%16s %-40s %10.9f ", "", (strrchr (cdoc->filename, '/') ? : cdoc->filename), scores[ci]); } /* Loop over each word in the word vector QUERY_WV, putting its contribution into SCORES. */ for (wvi = 0; wvi num_entries; wvi++) { int wi; /* the word index for the word at WVI */ bow_dv *dv; /* the "document vector" for the word WI */ /* Get information about this word. */ wi = query_wv->entry[wvi].wi; dv = bow_wi2dvf_dv (barrel->wi2dvf, wi); /* If the model doesn't know about this word, skip it. */ if (!dv) continue; if (bow_print_word_scores) printf ("%-30s (queryweight=%.8f) ", bow_int2word (wi), query_wv->entry[wvi].weight * query_wv->normalizer); rescaler = DBL_MAX; /* Loop over all classes, putting this word's (WI's) contribution into SCORES. */ for (ci = 0, dvi = 0; ci cdocs->length; ci++) { /* Both these values are pretty arbitrary small numbers. */ static const double min_pr_tf = FLT_MIN * 1.0e5; bow_cdoc *cdoc; cdoc = bow_array_entry_at_index (barrel->cdocs, ci); assert (cdoc->type == model); /* Assign PR_W_C to P(w|C), either using a DV entry, or, if there is no DV entry for this class, using M-estimate smoothing */ if (dv) while (dvi length && dv->entry[dvi].di entry[dvi].di == ci) { if (loo_class == ci) { /* xxx This is not exactly right, because BARREL->WI2DVF->NUM_WORDS might have changed with the removal of QUERY_WV's document. */ pr_w_c = ((float) ((M_EST_M * M_EST_P) + dv->entry[dvi].count - query_wv->entry[wvi].count) / (M_EST_M + cdoc->word_count - query_wv->entry[wvi].count)); assert (pr_w_c > 0 && pr_w_c entry[dvi].count) / (M_EST_M + cdoc->word_count)); assert (pr_w_c > 0 && pr_w_c WI2DVF->NUM_WORDS might have changed with the removal of QUERY_WV's document. */ pr_w_c = ((M_EST_M * M_EST_P) / (M_EST_M + cdoc->word_count - query_wv->entry[wvi].count)); assert (pr_w_c > 0 && pr_w_c word_count)); assert (pr_w_c > 0 && pr_w_c 0 && pr_w_c entry[wvi].count); /* PR_TF can be zero due to round-off error, when PR_W_C is very small and QUERY_WV->ENTRY[CURRENT_INDEX].COUNT is very large. Here we fudgingly avoid this by insisting that PR_TF not go below some arbitrary small number. */ if (pr_tf -FLT_MAX + 1.0e5); if (((bow_params_naivebayes*)(barrel->method->params)) ->score_with_log_probabilities == bow_yes) log_pr_tf = -log_pr_tf; scores[ci] += log_pr_tf; if (bow_print_word_scores) printf (" %8.2e %7.2f %-40s %10.9f ", pr_w_c, log_pr_tf, (strrchr (cdoc->filename, '/') ? : cdoc->filename), scores[ci]); /* Keep track of the minimum score updated for this word. */ if (rescaler > scores[ci]) rescaler = scores[ci]; } if (((bow_params_naivebayes*)(barrel->method->params)) ->score_with_log_probabilities == bow_no) { /* Loop over all classes, re-scaling SCORES so that they don't get so small we loose floating point resolution. This scaling always keeps all SCORES positive. */ if (rescaler < 0) { for (ci = 0; ci cdocs->length; ci++) { /* Add to SCORES to bring them close to zero. RESCALER is expected to often be less than zero here. */ /* xxx If this doesn't work, we could keep track of the min and the max, and sum by their average. */ scores[ci] += -rescaler; assert (scores[ci] > -DBL_MAX + 1.0e5 && scores[ci] method->params)) ->score_with_log_probabilities == bow_no) { /* Rescale the SCORE one last time, this time making them all 0 or negative, so that exp() will work well, especially around the higher-probability classes. */ { rescaler = -DBL_MAX; for (ci = 0; ci cdocs->length; ci++) if (scores[ci] > rescaler) rescaler = scores[ci]; /* RESCALER is now the maximum of the SCORES. */ for (ci = 0; ci cdocs->length; ci++) scores[ci] -= rescaler; } /* Use exp() on the SCORES to get probabilities from log-probabilities. */ for (ci = 0; ci cdocs->length; ci++) { new_score = exp (scores[ci]); /* assert (new_score > 0 && new_score length; ci++) scores[ci] = 1.0 / scores[ci]; } /* Normalize the SCORES so they all sum to one. */ { double scores_sum = 0; for (ci = 0; ci cdocs->length; ci++) scores_sum += scores[ci]; for (ci = 0; ci cdocs->length; ci++) { scores[ci] /= scores_sum; /* assert (scores[ci] > 0); */ } } /* Return the SCORES by putting them (and the `class indices') into SCORES in sorted order. */ { num_scores = 0; for (ci = 0; ci cdocs->length; ci++) { if (num_scoresRelated Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.