Hello Program a random sentence generator using java that reads a context-free g
ID: 3694316 • Letter: H
Question
Hello
Program a random sentence generator using java that reads a context-free grammar from an input file and prints out one or more random sentences. It should take as its first argument, a path to the file containing the grammar. If a second numeric argument is present, it should generate as many sentences as the number in the second argument indicates. You should name your program as “randCFL”. Use the input file provided with this assignment labeled “grammar” to test your generator. You should get sentences (some that may not make any 1 sense) like, Jim ate cheese. big Jim ate green cheese. big cheese ate Jim. green Jim ate green big Jim.
The format of the example grammar file is as follows:
P N
P A P
START P V P.
A big
A green
N cheese
N Jim
V ate
This corresponds to the following context-free grammar, (V, , R, S) where
V = {S, A, P, N, V }
= {J im, big, green, cheese, ate}
R = { P N
P A P
S P V P.
A big
A green
N cheese
N J im
V ate}
Note that a line in the input file will consist of a non-terminal symbol followed by a space or a tab, and then a sequence of any number of non-terminal and terminal symbols. The grammar’s start symbol is START.
Give your program an option “-t” that makes it produce elaborate derivations instead of just strings. Please use the left-most derivation to produce the string. In other words, when your program is invoked as,
randCFL -t grammar 1
instead of simply printing out a sentence say, big cheese ate Jim, it should print the following derivation:
START => P V P. => A P V P. => big P V P.
=> big N V P. => big cheese V P.
=> big cheese ate P. => big cheese ate Jim.
Notice that this includes extra information on how the string was generated. If you think that the grammar is ambiguous and multiple leftmost derivations may exist, just pick one and show it.
This is the grammar file
P N
P A~P
START P V P.
A big
A green
N cheese
N Jim
V ate
Explanation / Answer
// FOLLWING CODE IS WRITTEN AS PER THE ASKED REUIREMENT IN JAVA
The format of the example grammar file is as follows:
P N
P A P
START P V P.
A big
A green
N cheese
N Jim
V ate
This corresponds to the following context-free grammar, (V, , R, S) where
V = {S, A, P, N, V }
= {J im, big, green, cheese, ate}
R = { P N
P A P
S P V P.
A big
A green
N cheese
N J im
V ate}
import java.util.*;
import java.io.*;
/**
* A class that keeps track of general Context Free Grammars
*/
public class CFG{
char start; // start variable
TreeSet ruleSet; // keep the rules sorted
HashMap ruleMap; // map from char to ArrayList of rule Strings
TreeSet variableSet; // set of Characters
/** Returns true if char represents a variable */
public boolean hasVariable(char c){
return variableSet.contains(new Character(c));
public CFG(){
ruleSet = new TreeSet();
}
public CFG(String fname){
this();
try{
parseFile(fname);
makeRuleMap();
}catch( Exception e ){
System.err.println("WARNING: Corrupt CFG has been defined!!!");
e.printStackTrace();
}
public CFG(CFG g){
start = g.start;
ruleSet = new TreeSet();
Object[] objarray = g.ruleSet.toArray();
for(int i = 0; i<objarray.length; i++){
Rule r = (Rule)objarray[i];
ruleSet.add(new Rule(r.LHS,r.RHS));
}
makeRuleMap();
}
/* The first method that needs to be called after a constructor */
public void parseFile(String fname)
throws FileNotFoundException, MalformedGrammarException{
FileInputStream fis;
try{
fis = new FileInputStream(fname);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
StringTokenizer tokens;
int line_num = 0;
char currVar;
variableSet = new TreeSet();
while (br.ready()){
//String tok;
String line = br.readLine();
if( line == null ) continue; //when line ends immediately in ' '
tokens = new StringTokenizer(line, " ");
if (!tokens.hasMoreTokens()){ // empty line
System.out.println("empty");
continue;
}
// FOR DEBUGGING
StringTokenizer testT = new StringTokenizer(line, " ");
for(String t = testT.nextToken(); testT.hasMoreTokens(); t = testT.nextToken())
System.out.print(t+" ");
// get the variable
String variable = tokens.nextToken();
if (variable.length() != 1)
throw new MalformedGrammarException();
currVar = variable.charAt(0);
variableSet.add(new Character(currVar));
// start variable is at the beginning of the file
if (line_num == 0)
start = currVar;
// get the productions
while(tokens.hasMoreTokens()){
String prod = tokens.nextToken();
if (prod.equals(""")) // epsilon production
ruleSet.add(new Rule(currVar,""));
else if( prod.indexOf('"')!=-1 ){
throw new MalformedGrammarException();
}
else
ruleSet.add(new Rule(currVar,prod));
}
line_num++;
}
fis.close();
}catch(IOException e){
if(e instanceof FileNotFoundException){
e.printStackTrace();
throw new FileNotFoundException();
}
throw new MalformedGrammarException();
}catch(NullPointerException e){
e.printStackTrace();
throw new MalformedGrammarException();
}
}
public void createFile(String filename)
throws FileNotFoundException{
FileOutputStream fos = null;
PrintWriter out = null;
try{
fos = new FileOutputStream(filename);
out = new PrintWriter(fos);
Object[] LHSarray = variableSet.toArray();
Character LHS;
Object[] RHSarray;
for(int i = 0; i<LHSarray.length; i++){
LHS = (Character)LHSarray[i];
out.print(LHS);
RHSarray = ((ArrayList)ruleMap.get(LHS)).toArray();
for(int j=0; j<RHSarray.length; j++)
out.print(" "+RHSarray[j]);
out.print(" ");
}
if(out != null ) out.close();
if(fos != null ) fos.close();
}
catch(Exception e){
e.printStackTrace();
}
} /*This method resets the ruleMap and variableSet in accordance with the ruleSet AND call AFTER parseFile(filename) */
public void makeRuleMap(){
Object[] vars = variableSet.toArray();
Object[] rules = ruleSet.toArray();
ArrayList al; // temp ArrayList variable
Character curr; // temp variable
// initialize the ruleMap
ruleMap = new HashMap();
for(int i=0; i<vars.length; i++){
curr = (Character)(vars[i]);
al = new ArrayList();
ruleMap.put(curr, al);
}
// fill the ruleMap
for(int i=0; i<rules.length; i++){
Rule r = (Rule)rules[i];
curr = new Character(r.LHS);
al = (ArrayList)ruleMap.get(curr);
al.add(r.RHS);
}
}
public String toString(){
Object[] vars = variableSet.toArray();
StringBuffer temp = new StringBuffer();
temp.append("Variable set is {");
for(int i=0; i<vars.length; i++){
temp.append(""+(i>0?",":"")+" "+vars[i]);
}
temp.append("} Start variable is "+start);
for(int i=0; i<vars.length; i++){
Character c = (Character)vars[i];
ArrayList al = (ArrayList)ruleMap.get(c);
temp.append(" "+c+" --> ");
for(int j=0; j<al.size(); j++)
temp.append( (j>0?" | " : " " )+showEpsilon(al.get(j)) );
}
return temp.toString();
}
/* Convert to IT "Epsilon" if x.toString().equals("") */
private static String showEpsilon(Object x){
String input = x.toString();
return ( input.equals("") ? "<epsilon>" : input );
}
/* returns true if epsilons transitions exist in the grammar */
public boolean hasEpsilons(){
//uses the fact that the empty string "" is the smallest string
//so that epsilon transition are firsr in the rule map
Object[] v = variableSet.toArray();
for(int i=0; i<v.length; i++)
if ( ((ArrayList)ruleMap.get((Character)v[i] )).size() > 0 &&
((ArrayList)ruleMap.get((Character)v[i] )).get(0).equals("") )
return true;
return false;
}
//returns true if only epsilon transition is from the start variable.
public boolean hasNoEpsilonsExceptForStart(){
// start --> epsilon:
Object[] v = variableSet.toArray();
for(int i=0; i<v.length; i++)
if ( ( (ArrayList)ruleMap.get((Character)v[i] )).size() > 0
&& ( (ArrayList)ruleMap.get( (Character)v[i] ) ).get(0).equals("")
&& !v[i].equals(new Character(start)) )
return false;
return true;
}
/* returns true if unit variable transitions exist in the grammar */
public boolean hasUnitTransitions(){
Object[] v = variableSet.toArray();
for(int i=0; i<v.length; i++){
ArrayList al = (ArrayList)ruleMap.get( (Character)v[i] );
for(int j=0; j<al.size(); j++){
String RHS = (String)al.get(j);
if( RHS.length() == 1 && variableSet.contains(new Character(RHS.charAt(0))) )
return true;
}
}
return false;
}
// returns true if the grammar is in Chomsky Normal Form
public boolean isChomskyNormalFormGrammar(){
if( !hasNoEpsilonsExceptForStart() )
return false;
// unit transitions?
if ( hasUnitTransitions() )
return false;
// now check that all the transitions are length <= 2,
// and if == 2, contain no terminals, and don't contain start symbol
Object[] v = variableSet.toArray();
//Checking for final stage of CNF
for(int i=0; i<v.length; i++){
ArrayList al = (ArrayList)ruleMap.get( (Character)v[i] );
for(int j=0; j<al.size(); j++){
String RHS = (String)al.get(j);
//System.out.println(v[i]+" -> "+RHS);
if( RHS.length() > 2)
return false;
else if ( RHS.length() == 2 ){
//System.out.println(RHS);
if(!( variableSet.contains(new Character(RHS.charAt(0)))
&& variableSet.contains(new Character(RHS.charAt(1)) ) )
|| RHS.charAt(0)==start || RHS.charAt(1)==start )
return false;
}
}
}
return true;
}
public static void main(String[] args)
throws FileNotFoundException, MalformedGrammarException{
if(args.length < 1 || args.length > 2) {
System.out.println("Usage: java Parser <grammar_file> [<conversion instruction>]");
System.exit(1);
}
String fname = args[0];
String conversionInstruction = (args.length == 2) ? args[1] : null ;
// Build the cfg
CFG cfg = new CFG();
cfg.parseFile(fname);
cfg.makeRuleMap();
fname = (new StringTokenizer(fname,".")).nextToken();
if( conversionInstruction != null){
String suffix = "";
if (conversionInstruction.equals("-removeEpsilons")){
cfg.removeEpsilons();
suffix = "_noeps";
}
if (conversionInstruction.equals("-removeUnits")){
cfg.removeUnits();
suffix = "_nounits";
}
if (conversionInstruction.equals("-makeCNF")){
cfg.makeCNF();
suffix = "_cnf";
}
cfg.createFile(fname+suffix+".cfg");
}
test(cfg);
System.out.println();
System.out.println(" AFTER CFG ");
cfg.removeEpsilons();
cfg.removeUnits();
cfg.makeCNF();
test(cfg) }
public static void test(CFG cfg) {
System.out.println(cfg);
System.out.println("Number of variables = "+ cfg.variableSet.size());
System.out.println("Number of rules = "+ cfg.ruleSet.size());
System.out.println("Contains epsilons? "+cfg.hasEpsilons());
System.out.println("Has no epsilons, except perhaps <START> --> epsilon? "
+cfg.hasNoEpsilonsExceptForStart() );
System.out.println("Contains variable unit transitions? "+cfg.hasUnitTransitions());
System.out.println("Is in Chomsky Normal Form? "+cfg.isChomskyNormalFormGrammar() );
}
public void removeEpsilons(){ /* Before anything, add a new start symbol */
Rule newStart = new Rule(getUnusedSymbol(),start+"");
start = newStart.getLHS();
ruleSet.add(newStart);
makeRuleMap();
Object[] vars = variableSet.toArray();
Iterator itr;
Rule rule;
char symbol;
TreeSet tempSet = new TreeSet();
int test = 0;
while(!hasNoEpsilonsExceptForStart()) {
for(int i=0; i<vars.length; i++) {
symbol = ((Character)vars[i]).charValue();
System.out.println("i: "+i+" curr symbol: "+symbol);
if(ruleSet.remove(new Rule(symbol, "" ))) {
itr = ruleSet.iterator();
while(itr.hasNext()) {
rule = ((Rule)itr.next()).copyAndReplaceAll(symbol,"");
if(rule != null) {
if(!rule.isEpsilonRule() || rule.getLHS() == start)
tempSet.add(rule);
}
}
}
}//add all the new rules made to the ruleSet
ruleSet.addAll(tempSet);
tempSet = new TreeSet();
//update the ruleMap
makeRuleMap();
}
}
public void removeUnits() throws MalformedGrammarException{
if (!hasNoEpsilonsExceptForStart())
throw new MalformedGrammarException("When removing unit transitions"+
" may not have non-start epsilon transitions");
Rule rule;
Iterator iter1;
Iterator iter2; TreeSet removeSet; TreeSet addSet; while(hasUnitTransitions()) { removeSet = new TreeSet();
addSet = new TreeSet();
iter1 = ruleSet.iterator();
while(iter1.hasNext()) {rule = (Rule)iter1.next();
if(rule.isUnitRule()) { char LHS = rule.getRHS().charAt(0);
if(variableSet.contains(new Character(LHS))) {
removeSet.add(rule);
if(rule.isReflexiveRule()) continue;
}
else continue; SortedSet subset = ruleSet.subSet(new Rule(LHS,""),new Rule(LHS,"zzzzzzzzzzzzzzz"));
iter2 = subset.iterator();
while(iter2.hasNext()) {
Rule currRule = (Rule)iter2.next();
Rule newRule = new Rule(rule.getLHS(),currRule.getRHS());
if(!newRule.isReflexiveRule())
addSet.add(newRule);
}
}
}
}
ruleSet.removeAll(removeSet);
ruleSet.addAll(addSet); // NEED TO UPDATE MAP
makeRuleMap();
}
}
public void makeCNF() throws MalformedGrammarException{
if (hasUnitTransitions() || !hasNoEpsilonsExceptForStart())
throw new MalformedGrammarException("When converting to Chomsky Normal Form"+
" must not have non-start epsilon transitions"+
" and must have gotten rid of unit transitions");
Iterator iter; TreeSet addSet;
int test = 0; while(!isChomskyNormalFormGrammar()) {
iter = ruleSet.iterator(); addSet = new TreeSet(); while(iter.hasNext()) {
Rule rule = (Rule)iter.next();
String RHS = rule.getRHS();
if(rule.isEpsilonRule()) continue; //do nothing, it must be the start symbol
if(rule.isUnitRule()) continue; //do nothing, it must be a terminal
//if one or both characters in the RHS are terminals we have to process the rule
if(RHS.length() == 2) {
char c1 = RHS.charAt(0);
char c2 = RHS.charAt(1);
String newRHS = "";
if(!variableSet.contains(new Character(c1))) {
newRHS += addNewRuleIfNeeded(c1+"",addSet);
}
else newRHS += c1;
if(!variableSet.contains(new Character(c2))) {
newRHS += addNewRuleIfNeeded(c2+"",addSet);
}
else newRHS += c2; rule.setRHS(newRHS); continue;
}
String substr = RHS.substring(1,RHS.length());
rule.setRHS(RHS.charAt(0)+""+addNewRuleIfNeeded(substr,addSet));
}
ruleSet.addAll(addSet);
makeRuleMap();
// System.out.println(toString());
}//whlie not in CNF
}
private char getUnusedSymbol() {
Character c;
for(int i=(int)'A';i<=(int)'Z';i++)
if(!variableSet.contains(c = new Character((char)i))) {
variableSet.add(c);
//System.out.println("Adding new symbol: "+(char)i);
return (char)i;
}
for(int i=(int)'a';i<=(int)'z';i++)
if(!variableSet.contains(c = new Character((char)i))) {
variableSet.add(c);
return (char)i;
}
return '';
} private char addNewRuleIfNeeded(String RHS,TreeSet addSet) {
Iterator iter = variableSet.iterator();
ArrayList al;
Character symbol;
while(iter.hasNext()) {
symbol = (Character)iter.next();
//System.out.println("Symbol = "+symbol);
al = (ArrayList)ruleMap.get(symbol);
if(al == null) {
Iterator iter2 = addSet.iterator();
while(iter2.hasNext()) {
Rule tmp = (Rule)iter2.next();
if(tmp.getRHS().equals(RHS))
return tmp.getLHS();
}
continue;
}
if(al.size() == 1 && al.get(0).equals(RHS))
return symbol.charValue();
}
Rule newRule = new Rule(getUnusedSymbol(),RHS);
addSet.add(newRule);
return newRule.getLHS();
}
public char getStartSymbol() {
return start;
}
}// IT IS A CLASS CFG
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.