Create a tokenizer in Ruby. Your tokenizer will take two command line arguments:
ID: 3877988 • Letter: C
Question
Create a tokenizer in Ruby.
Your tokenizer will take two command line arguments: The first will be a grammar specification and the second will be a file to tokenize. Tokenize the file and print the tokens (symbol, lexeme, line) to the screen. If the file cannot be tokenized, print an error message identifying the line with the error.
grammar specification file:
NUM -> d+
ADDOP -> [-+]
MULOP -> [*/]
LP -> (
RP -> )
EQ -> =
ID -> [A-Z]w*
comment -> {[^}]*}
S -> ID EQ expr
expr -> expr ADDOP term | term
term -> term MULOP factor | factor
factor -> ID | NUM | LP expr RP
eample file 1 to tokenize:
4+2 { this is
a comment }
+ 6
eample file 2 to tokenize:
1
+
2 *
3
Code so far:
Explanation / Answer
#ruby tokenizer program
require_relative 'spec_helper'
describe Tokenizer do
let(:tokenizer) { described_class.new }
let(:offset) { 1 } # default token offset
describe '#get' do
it 'does tokenization' do
expect(tokenizer.get('foo bar')).to eq ([offset, offset + 1])
end
it 'ignores too short tokens' do
t = described_class.new(min_length: 2)
expect(t.get('x')).to eq []
end
it 'ignores stop words' do
t = described_class.new(stop_words: ['xyz'])
expect(t.get('xyz foo')).to eq [offset]
end
it 'does not return nil tokens' do
tokenizer.tokens.get('foo')
tokenizer.tokens.freeze!
expect(tokenizer.get('foo bar')).to eq [offset]
end
end
describe '#tokens' do
it 'returns a tokens object by default' do
expect(tokenizer.tokens).to be_a Tokens
end
it 'can be overridden' do
tokens = Tokens.new
t = described_class.new(tokens)
expect(t.tokens).to be tokens
end
end
end
..
tokens_spec.rb
require_relative 'spec_helper'
require 'tempfile'
describe Tokens do
let(:tokens) { described_class.new }
let(:offset) { 1 } # default offset
describe '#get' do
it 'can new tokens' do
expect(tokens.get('bar')).to eq offset
expect(tokens.get('foo')).to eq (offset + 1)
end
it 'can get an existing token' do
tokens.get('bar')
expect(tokens.get('bar')).to eq offset
end
it 'can include a prefix' do
tokens.get('bar', prefix: 'XyZ$')
expect(tokens.get('XyZ$bar')).to eq offset
end
it 'can get an existing token when frozen' do
tokens.get('blup')
tokens.freeze!
expect(tokens.get('blup')).to eq offset
end
it 'cannot get a new token when frozen' do
tokens.get('blup')
tokens.freeze!
expect(tokens.get('blabla')).to be_nil
end
end
describe '#find' do
it 'can find an existing token' do
tokens.get('blup')
i = tokens.get('blah')
expect(tokens.find(i)).to eq 'blah'
end
it 'returns nil for a non-existing token' do
tokens.get('blup')
expect(tokens.find(offset + 1)).to eq nil
end
it 'removes the prefix' do
i = tokens.get('blup', prefix: 'FOO$')
expect(tokens.find(i, prefix: 'FOO$')).to eq 'blup'
end
end
describe '#indexes' do
it 'is empty without tokens' do
expect(tokens.indexes).to eq []
end
it 'returns the expected indexes' do
tokens.get('foo')
tokens.get('blup')
expect(tokens.indexes).to eq [offset, offset + 1]
end
end
describe '#offset' do
it 'has a default' do
expect(described_class.new.offset).to eq offset
end
it 'can override the default' do
expect(described_class.new(offset: 5).offset).to eq 5
end
it 'affects the first number' do
tokens = described_class.new(offset: 12)
expect(tokens.get('hi')).to eq 12
end
end
describe '#frozen?' do
it 'is not frozen by default' do
expect(tokens.frozen?).to be false
end
it 'can be frozen' do
tokens.freeze!
expect(tokens.frozen?).to be true
end
it 'can be thawed' do
tokens.freeze!
tokens.thaw!
expect(tokens.frozen?).to be false
end
end
describe '#limit!' do
it 'limits to most frequent tokens by max_size' do
tokens.get('foo')
tokens.get('blup')
tokens.get('blup')
tokens.limit!(max_size: 1)
expect(tokens.indexes).to eq [offset + 1]
end
it 'limits by min_occurence' do
tokens.get('foo')
tokens.get('blup')
tokens.get('foo')
tokens.limit!(min_occurence: 2)
expect(tokens.indexes).to eq [offset]
end
end
describe '#load' do
let(:file) { Tempfile.new('tokens') }
after { file.unlink }
it 'saves and loads tokens' do
tokens.get('foo')
tokens.get('bar')
tokens.save(file.path)
expect(File.exists?(file.path)).to be true
expect(File.zero?(file.path)).to be false
ntokens = described_class.new
ntokens.load(file.path)
expect(tokens.get('bar')).to eq (offset + 1)
end
end
end
.
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.