My Assignment: Your task is to write a C program, called dna.c, that reads three
ID: 3639946 • Letter: M
Question
My Assignment:Your task is to write a C program, called dna.c, that reads three DNA sequences from a file called dna input.dat and prints the results of a comparison between each pair of sequences to the file dna output.dat. The input file dna input.dat consists of three lines. Each line is a single se- quence of characters from the set {A, C, G, T}, that appear without spaces in some order, terminated by the end of line character . You can assume that the three lines contain the same number of characters, and that this number is at most 241 (including the character ). Here is a sample input file:
?
ACGTTTTAAGGGCTGAGCTAGTCAGTTCATCGCGCGCGTATATCCTCGATCGATCATTCT
CTCTAGACGTTTTAAGGGCTGAGCTAGTCAGTTC
ACGTTTTAAGGGCTTAGAGCTTATGCTAATCGCGCGCGTATATCCTCGATCGATCATTCT
CTCTAGACGTTTTAAGGGCTAAGGCGCGTAATTA
TCGTTTGAAGGGCTTAGTTAGTTAGTTCATCGGCGGCGTATATCCTCGATCGATCATTCT
CTCTAGACGTTTTAAGGGCTGAGCCGGTCAGTTA
Each of the three lines (shown with wrap-around above) consists of 95 characters: the 94 letters from {A, C, G, T} and the character (not shown). The output file dna output.dat must be structured as follows. For each pair of sequences #i and # j, with i, j ? {1, 2, 3} and i > j, you should print:
• A single line, saying “Comparison between sequence #i and sequence #j:”
• The entire sequence #i in the first row, and the entire sequence # j in the third row.
• The comparison between the two sequences in the second (middle) row. This should be printed as follows. For each position, if the two bases are the same in both sequences then the corresponding base letter (one of A, C, G, T) should be printed; otherwise a blank " " should be printed.
• A single line, saying “The overlap percentage is x%” where x is a floating-point num- ber which measures the percentage of letters that match in the two sequences. This number should be printed with a single digit of precision after the decimal point.
Each line in the output file dna output.dat should contain at most 61 characters, including the end of line character . If the DNA sequences are longer than that, then each of the three rows mentioned above should be split across several lines, with the first few lines containing exactly 60 letters, and the last containing the rest of the letters. Here is a sample file dna output.dat which results upon processing the file dna input.dat above:
Comparison between sequence #1 and sequence #2:
ACGTTTTAAGGGCTGAGCTAGTCAGTTCATCGCGCGCGTATATCCTCGATCGATCATTCT ACGTTTTAAGGGCT AG T G T ATCGCGCGCGTATATCCTCGATCGATCATTCT ACGTTTTAAGGGCTTAGAGCTTATGCTAATCGCGCGCGTATATCCTCGATCGATCATTCT
CTCTAGACGTTTTAAGGGCTGAGCTAGTCAGTTC CTCTAGACGTTTTAAGGGCT AG A TT CTCTAGACGTTTTAAGGGCTAAGGCGCGTAATTA
The overlap percentage is 80.9%
What I have so far:
int read_DNA (char sequence[]);
double compare_DNA (char seq1[], char seq2[], char seq3[], int n);
void print_DNA (char seq1[], char seq2[], char seq3[], int n);
/*~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
The purpose of the main function
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~*/
int main() {
FILE *input = fopen ("dna_input.dat", "r");
FILE *output = fopen ("dna_output.dat", "w");
char DNA_seq1[MAX_IN_LENGTH], DNA_seq2[MAX_IN_LENGTH], DNA_seq3[MAX_IN_LENGTH
];
read_DNA (DNA_seq1[]);
read_DNA (DNA_seq2[]);
read_DNA (DNA_seq3[]);
for (int i = 1; i <= 3; i++) {
for (int j = 1; j <= 3; j++) {
if (i < j) {
fprintf (output,"Comparison between sequence #%d and sequence #%d: ",
i, j);
print_DNA (seq );
fprintf (output, "The overlap percentage is %.1f'%' ", compare_DNA (
seq1[], seq2[], seq3[],n);
}
}
}
}
fpclose(input);
fpclose(output);
}
/*======================================
Function definitions
=======================================*/
int read_DNA (char sequence[]) {
do {
fscanf (input, "%c", sequence[i]);
i++;
} while (sequence[i] != '')
return i - 1;
}
double compare_DNA (char seq1[], char seq2[], char seq3[], int n) {
int numerator;
double overlap;
for (int i = 0; i < n; i++) {
if (seq1[i] == seq2[i]) {
numerator += 1;
seq[i] = seq1[i];
}
else
seq1[i] = {};
continue;
}
overlap = (double) (numerator/n) * 100;
return overlap;
}
void print_DNA (char seq1[], char seq2[], char seq3[], int n) {
for (int i = 0; i < 90; i += MAX_IN_LENGTH)
for (int j = 0; j <= i; j++) {
fprintf (output,"%c", seq1[j]);
fprintf (output,"%c", seq3[j]);
fprintf (output,"%c", seq2[j]);
printf (" ");
}
}
----------------
I am having a problem to get print out like the sample example
Explanation / Answer
please rate - thanks
try this
#include<stdio.h>
#include<conio.h>
#define MAX_IN_LENGTH 241
#define OUT_LENGTH 60
FILE *in;
FILE *out;
double compare_DNA(char [], char [], char s[], int );
void print_DNA(char [], char [], char [], int );
int read_DNA(char []);
int main()
{chars1[MAX_IN_LENGTH],s2[MAX_IN_LENGTH],s3[MAX_IN_LENGTH],ans[MAX_IN_LENGTH];
double percent;
int n,len;
in= fopen("dna_input.dat","r");
if(in==NULL) //is it ok?
{ printf("input file did not open please checkit ");
getch();
return 1;
}
out= fopen("dna_output.dat","w");
len=read_DNA(s1);
len=read_DNA(s2);
len=read_DNA(s3);
fprintf(out,"Comparison between sequence # 1 and sequence #2: ");
percent=compare_DNA(s1,s2,ans,len);
print_DNA(s1,s2,ans,len);
fprintf(out,"The overlap percentage is %.1f%% ",percent*100);
fprintf(out,"Comparison between sequence # 1 and sequence #3: ");
percent=compare_DNA(s1,s3,ans,len);
print_DNA(s1,s3,ans,len);
fprintf(out,"The overlap percentage is %.1f%% ",percent*100);
fprintf(out,"Comparison between sequence # 2 and sequence #3: ");
percent=compare_DNA(s2,s3,ans,len);
print_DNA(s2,s3,ans,len);
fprintf(out,"The overlap percentage is %.1f%% ",percent*100);
fclose(in);
fclose(out);
return 0;
}
double compare_DNA(char seq1[], char seq2[], char seq3[], intn)
{int i,count=0;
for(i=0;i<n-1;i++)
{ if(seq1[i]==seq2[i])
{seq3[i]=seq1[i];
count++;
}
else
seq3[i]=' ';
}
return (double)count/(n-1);
}
void print_DNA(char seq1[], char seq2[], char seq3[], int n)
{int i=0,lines,j,start,stop;
lines=n/OUT_LENGTH;
if( n%OUT_LENGTH!=0)
lines++;
for(j=0;j<lines;j++)
{start=j*OUT_LENGTH;
stop=(j+1)*OUT_LENGTH;
if(stop>=n)
stop=n-1;
for(i=start;i<stop;i++)
fprintf(out,"%c",seq1[i]);
fprintf(out," ");
for(i=start;i<stop;i++)
{fprintf(out,"%c",seq3[i]);
if(seq3[i]==' ')
fprintf(out,"");
}
fprintf(out," ");
for(i=start;i<stop;i++)
fprintf(out,"%c",seq2[i]);
fprintf(out," ");
}
}
int read_DNA(char s[])
{
fgets(s,MAX_IN_LENGTH,in);
int i;
for(i=0;i<MAX_IN_LENGTH;i++)
{if(s[i]=='')
returni;
}
return MAX_IN_LENGTH;
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.