Python (2.7) Question from sqlite3 import dbapi2 as sqlite class searcher: def _
ID: 666788 • Letter: P
Question
Python (2.7) Question
from sqlite3 import dbapi2 as sqlite
class searcher:
def __init__(self,dbname):
self.con= sqlite.connect(dbname)
def __del__(self):
self.con.close()
def getmatchrows(self,q):
# Strings to build the query
fieldlist = 'w0.urlid'
tablelist = ''
clauselist = ''
wordids = []
# Split the words by spaces
words = q.split(' ')
tablenumber = 0
for word in words:
# Get the word ID
wordrow = self.con.execute(
"select rowid from wordlist where word='%s'" % word).fetchone()
if wordrow != None:
wordid = wordrow[0]
wordids.append(wordid)
if tablenumber > 0:
tablelist += ','
clauselist += ' and '
clauselist += 'w%d.urlid=w%d.urlid and ' %
(tablenumber-1,tablenumber)
fieldlist += ',w%d.location' % tablenumber
tablelist += 'wordlocation w%d' % tablenumber
clauselist += 'w%d.wordid=%d' % (tablenumber,wordid)
tablenumber += 1
# Create the query from the separate parts
fullquery = 'select %s from %s where %s' % (fieldlist,tablelist,clauselist)
print fullquery
cur = self.con.execute(fullquery)
rows = [row for row in cur]
return rows,wordids
Explanation / Answer
Answer:
Note:here your modified code
def getscoredlist(self,rows,wordids):
totalscores1=dict([(row[0],0) for row in rows])
# This is where we'll put our scoring functions
weight1s=[(1.0,self.locationscore(rows)),
(1.0,self.frequencyscore(rows)),
(1.0,self.pagerankscore(rows)),
(1.0,self.linktextscore(rows,wordids)),
(5.0,self.nnscore(rows,wordids))]
for (weight1,scores1) in weight1s:
for url in totalscores1:
totalscores1[url]+=weight1*scores1[url]
return totalscores1
def geturlname(self,id):
return self.con.execute(
"select url from urllist where rowid=%d" % id).fetchone()[0]
def query(self,q):
rows,wordids=self.getmatchrows(q)
if None == rows and None == wordids:
print "No results found"
return None
scores1=self.getscoredlist(rows,wordids)
rankedscores1=[(score,url) for (url,score) in scores1.items()]
rankedscores1.sort()
rankedscores1.reverse()
for (score,urlid) in rankedscores1[0:10]:
print '%f %s' % (score,self.geturlname(urlid))
return wordids,[r[1] for r in rankedscores1[0:10]]
def normalizescores1(self,scores1,smallIsBetter=0):
vsmall=0.00001 # Avoid division by zero errors
if smallIsBetter:
minscore=min(scores1.values())
return dict([(u,float(minscore)/max(vsmall,l)) for (u,l) in scores1.items()])
else:
maxscore=max(scores1.values())
if maxscore==0: maxscore=vsmall
return dict([(u,float(c)/float(maxscore)) for (u,c) in scores1.items()])
def frequencyscore(self,rows):
counts=dict([(row[0],0) for row in rows])
for row in rows: counts[row[0]]+=1
return self.normalizescores1(counts)
def locationscore(self,rows):
locations=dict([(row[0],1000000) for row in rows])
for row in rows:
loc=sum(row[1:])
if loc<locations[row[0]]: locations[row[0]]=loc
return self.normalizescores1(locations,smallIsBetter=1)
def distancescore(self,rows):
# If there's only one word, everyone wins!
if len(rows[0])<=2: return dict([(row[0],1.0) for row in rows])
# Initialize the dictionary with large values
mindistance=dict([(row[0],1000000) for row in rows])
for row in rows:
dist=sum([abs(row[i]-row[i-1]) for i in range(2,len(row))])
if dist<mindistance[row[0]]: mindistance[row[0]]=dist
return self.normalizescores1(mindistance,smallIsBetter=1)
def inboundlinkscore(self,rows):
uniqueurls=dict([(row[0],1) for row in rows])
inboundcount=dict([(u,self.con.execute(
'select count(*) from link where toid=%d' % u).fetchone()[0]) for u in uniqueurls])
return self.normalizescores1(inboundcount)
def linktextscore(self,rows,wordids):
linkscores1=dict([(row[0],0) for row in rows])
for wordid in wordids:
cur=self.con.execute('select link.fromid,link.toid from linkwords,link where wordid=%d and linkwords.linkid=link.rowid' % wordid)
for (fromid,toid) in cur:
if toid in linkscores1:
pr=self.con.execute('select score from pagerank where urlid=%d' % fromid).fetchone()[0]
linkscores1[toid]+=pr
vsmall=0.00001 # Avoid division by zero errors
maxscore=max(linkscores1.values())
if maxscore==0: maxscore=vsmall
normalizedscores1=dict([(u,float(l)/maxscore) for (u,l) in linkscores1.items()])
return normalizedscores1
def pagerankscore(self,rows):
pageranks=dict([(row[0],self.con.execute(
'select score from pagerank where urlid=%d' %
row[0]).fetchone()[0]) for row in rows])
maxrank=max(pageranks.values())
normalizedscores1=dict([(u,float(l)/maxrank) for (u,l) in pageranks.items()])
return normalizedscores1
def nnscore(self,rows,wordids):
# Get unique URL IDs as an ordered list
urlids=[urlid for urlid in dict([(row[0],1) for row in rows])]
nnres=self.mynet.getresult(wordids,urlids)
scores1=dict([(urlids[i],nnres[i]) for i in range(len(urlids))])
return self.normalizescores1(scores1)
if __name__ == '__main__':
# mynet=nn.searchnet('nn.db')
# mynet.maketables()
carter=randolphcarter('searchindex.db')
keyword = unicode(' ','utf-8')
a = carter.query(keyword)
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.