Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

A DNA sequence is a sequence of some combination of the characters A (adenine),

ID: 3853729 • Letter: A

Question

A DNA sequence is a sequence of some combination of the characters A (adenine), C (cytosine), G (guanine), and T (thymine) which correspond to the four nucleobases that make up DNA. Given a long DNA sequence, it is often necessary to compute the number of instances of a certain subsequence. For this exercise, you will develop a program that processes a DNA sequence from a file and, given a subsequences, searches the DNA sequence and counts the number of times s appears. As an example, consider the following sequence: GGAAGTAGCAGGCCGCATGCTTGGAGGTAAAGTTCATGGTTCCCTGGCCC If we were to search for the subsequence GTA, it appears twice. You will write a program (place your source in a file named dnaSearch.c) that takes, as command line inputs, an input file name and a valid DNA (sub)sequence. That is, it should be callable from the command line as follows: /dnaSearch dna01.txt GTA GTA appears 2 times

Explanation / Answer

dnasequenssearch.c
----------------------------------------------
#include "dnasearch.h"
int main(int argc, const char *argv[]) {
/* Make sure the user is using the program correctly */
    if (argc != 2) {
        printf("Usage: ./a.out <input_file>");
        return 1;
    }
/* Open the file by the name that the user entered */
    FILE *in = fopen(argv[1], "r");

/* Check each letter of the file to make sure it's valid, and input it into the file[] array */
    for (int i = 0; !feof(in); i++) {
        char temp;
        fscanf(in, " %c", &temp);
        temp = toupper(temp);
        fLength++;
        if (isValid(temp)) {
            file[i] = temp;
        } else {
            printf("Invalid file! ");
            return 1;
        }
    }

/* If the file is too large, quit */
    if (fLength > MAX) {
        printf("Invalid file! ");
        return 1;
    }

/* Take in the user input and make sure it is valid */
    fgets(input, MAX, stdin);
    for (; input[inLength] != ''; inLength++) {
        if (!isValid(input[inLength])) {
            printf("Invalid input! ");
            return 1;
        }
    }

/* This loop goes through the user input array and picks out individual patterns separated by spaces. Variables indicate the index of the start and end of each inputted pattern and then make a temporary array with these to search with */
    while (end < inLength) {
        for (int i = end; i < inLength; i++) {
            if (isspace(input[i])) {
                end = i;
                break;
            }
        }
        char temp[MAX] = {''};
        for (int i = 0; i < end - start; i++) {
            temp[i] = input[start + i];
        }
        search(temp, end - start);
/* The user can have as many spaces as they wish between entered patterns */
        while (input[end+1] == ' ') {
            end++;
        }
        start = ++end;

    }

    fclose(in);
    return 0;
}
--------------------------------------------------------------------------------------------
dnasearch.c
--------------------------------------
#include "dnasearch.h"
/* This method searches the file array for the pattern in question, the "check" array */
int *search(char check[], int length) {
/* Declaring variables */
    int present, lo = 0, everPres = 0;
    static int loc[MAX] = {-1};
    printf("%s ", check);
    for (int i = 0; i < fLength-length; i++) {
        present = 0;
        if (check[0] == file[i]) {
            present = 1;
/* If the first letter of check is present, check the following letters to see if they match */
            for (int j = 0; j < length; j++) {
                if (check[j] != file[i+j]) {
                    present = 0;
                    break;
                }
            }
        }
/* If the pattern was present, print out its location and add it to the loc array */
        if (present) {
            everPres = 1;
            printf("%d ", i);
            loc[lo] = i;
            lo++;
        }
    }
/* If it never appeared, then print out "Not found" */
    if (!everPres) {
        printf("Not found");
    }
    printf(" ");
    return loc;
}
/* This method checks an inputted character for validity under the rules of the program */
int isValid(char c) {
    if (c == 'A' || c == 'C' || c == 'G' || c == 'T' || c == ' ' || c == ' ') {
        return 1;
    }
    return 0;
}
----------------------------------------------------------------------------------
dnasearch.h
------------------------------------------------
#include <stdio.h>
#include <ctype.h>

#define MAX 15000

/* Variables for use in all files */
char file[MAX];
int start, end, inLength, fLength;
char input[MAX];

/* Helper methods */
int *search(char check[], int length);
int isValid(char c);
------------------------------------------------------------------------------------
test.c
-------------------------------------
#include <stdio.h>

int main(void) {

    char characters[100];
    FILE *in = fopen("test.txt", "r");
    int n = 0;

    while (!feof(in)) {
        fscanf(in, "%c", &characters[n]);
        n++;
    }
    for (int j = 0; j < i; j++) {
        printf("%c ", characters[j]);
    }

    char input[15] = {''};
    fgets(input, 15000, stdin);
    printf("%s ", input);

    int start, end = 0;
    char temp[15000] = {''};
    while (end < sizeof(characters)) {
        for (int i = end; i < sizeof(characters); i++) {
            if (isspace(characters[i])) {
                end = i;
                break;
            }
        }
        for (int i = start; i < end; i++) {
            temp[i] = characters[i];
        }

            HELPER(temp);
            start = end;
        }
    }

    fclose(in);
}
----------------------------------------------------------------------------
test_dnasearch.c
---------------------------------------------
#include "dnasearch.h"
#include <assert.h>

/* This method checks to make sure my "search" helper method works */
void checkSearch(void) {
/* Arrays below are either simulations of what a user could put into the input stream, or what would be expected as a result of these inputs */
    char cat[4] = {'C', 'A', 'T', ''};
    char a[2] = {'A', ''};
    char tttttttttt[11] = {'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', 'T', ''};
    int expectedCAT[4] = {0, 99, 171, 242};
    int expectedA[59] = {1, 3, 6, 9, 12, 14, 15, 21, 24, 30, 35, 41, 45, 49, 56, 59, 64, 69, 79, 83, 86, 88, 94, 96, 98, 100, 104, 108, 114, 118, 120, 122, 131, 133, 135, 139, 143, 149, 152, 155, 160, 164, 169, 172, 175, 176, 192, 196, 201, 205, 209, 213, 215, 218, 227, 230, 237, 243, ''};

/* Here I take in the file and find its length, which I need in my search method. */
    FILE *in = fopen("test.txt", "r");
    for (int i = 0; !feof(in); i++) {
        fscanf(in, " %c", &file[i]);
        fLength++;
    }

/* The assert statements are below. The search method returns an array of integer locations of the pattern in question, and here I am comparing what search returns with what is expected. Everything checks out! */
    int *output = search(cat, 3);

    for (int i = 0; i < 4; i++) {
        assert(*(output + i) == expectedCAT[i]);
    }

    int *output2 = search(a, 1);
    for (int i = 0; i < 57; i++) {
        assert(*(output2 + i) == expectedA[i]);
    }
    int *output3 = search(tttttttttt, 10);
    assert(*output3 == 1);
}

/* This method checks to make sure my "isValid" helper method works */
void checkIsValid(void) {
/* The first file is a valid input file, while the second is not */
    FILE *correct = fopen("test.txt", "r");
    FILE *incorrect = fopen("test2.txt", "r");

    for (int i = 0; !feof(correct); i++) {
        char temp;
        fscanf(correct, " %c", &temp);
        temp = toupper(temp);
        assert(isValid(temp));
    }

    for (int i = 0; !feof(incorrect); i++) {
        char temp;
        fscanf(incorrect, " %c", &temp);
        temp = toupper(temp);
        assert(!isValid(temp));
    }
}

int main(void) {
    checkSearch();
    checkIsValid();
    return 0;
}
-------------------------------------------------------------------------------------
test.txt
---------------------------
CATATTAC
GATTACAACGT
TGACGATCG
CGATCGTAGCTTGACTGATCGATCG
CTGACTAGCTGATGCTAGCTGGGGCTAGCTATTATACTTGT
ATATACATCGAGCTATCTCTATCGATATAGCTCGCCTATATACGTAGCTAGCTG
GACTACTATCGTAGCTAGCTGATCATTAAGCTCGCTCTCTGCTGACTGATCGTAGCTAGCTAGCTATATGACGTCTGCTAGTAGCTCCGATGCTCAT

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote