You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
3.1 KiB
Python
96 lines
3.1 KiB
Python
|
|
import re
|
|
import requests
|
|
import sys
|
|
|
|
URL = 'http://localhost:8088/suggestions/autocomplete'
|
|
|
|
def printf(format, *args):
|
|
sys.stdout.write(format % args)
|
|
|
|
|
|
def normalize_query(query):
|
|
query = query.lower()
|
|
query = re.sub(r"[^0-9a-z ]", " ", query)
|
|
query = re.sub(r" +", " ", query)
|
|
return re.sub(r"^ | $", "", query)
|
|
|
|
def reciprocal_rank(query, json):
|
|
relevant = normalize_query(query)
|
|
nr = 1
|
|
#print relevant, " = ",
|
|
for hit in json['hits']:
|
|
title = normalize_query(hit['title'])
|
|
#print title, ", "
|
|
if (title == relevant):
|
|
return 1.0 / nr;
|
|
if (nr > 10):
|
|
print "Warning: More than 10 results"
|
|
nr += 1
|
|
return 0;
|
|
|
|
|
|
def returned_of_10(json):
|
|
return len(json['hits']) / 10.0
|
|
|
|
|
|
if (len(sys.argv) != 2):
|
|
sys.stderr.write ("Usage: python evaluate.py testdata.txt\n")
|
|
sys.exit(1)
|
|
|
|
with open(sys.argv[1]) as testdata:
|
|
recip_rank_total_char = [0.0, 0.0, 0.0, 0.0, 0.0]
|
|
returned10_total_char = [0.0, 0.0, 0.0, 0.0, 0.0]
|
|
recip_rank_total_word = [0.0, 0.0, 0.0, 0.0, 0.0]
|
|
returned10_total_word = [0.0, 0.0, 0.0, 0.0, 0.0]
|
|
query_total = 0;
|
|
for line in testdata:
|
|
query_total += 1
|
|
(freq, query) = line.rstrip().split("\t")
|
|
print query_total, query, ": ",
|
|
query = re.sub(r"^[^0-9A-Za-z]", "", query) # start with letter
|
|
old_prefix = ""
|
|
old_rr = 0.0
|
|
for length in range(1, 6):
|
|
prefix = query[:length]
|
|
if (prefix != old_prefix):
|
|
param = { 'q': prefix }
|
|
response = requests.get(URL, params=param)
|
|
rr = reciprocal_rank(query, response.json())
|
|
old_prefix = prefix
|
|
old_rr = rr
|
|
recip_rank_total_char[length - 1] += rr
|
|
returned10_total_char[length - 1] += returned_of_10(response.json())
|
|
print rr,
|
|
old_position = 1
|
|
old_prefix = ""
|
|
old_rr = 0.0
|
|
for length in range(1, 6):
|
|
try:
|
|
position = query[old_position:].index(" ") + old_position + 1
|
|
prefix = query[:position]
|
|
except:
|
|
position = len(query)
|
|
prefix = query
|
|
if (prefix != old_prefix):
|
|
param = { 'q': prefix }
|
|
response = requests.get(URL, params=param)
|
|
rr = reciprocal_rank(query, response.json())
|
|
old_prefix = prefix
|
|
old_rr = rr
|
|
recip_rank_total_word[length - 1] += rr
|
|
returned10_total_word[length - 1] += returned_of_10(response.json())
|
|
print rr,
|
|
old_position = position
|
|
print
|
|
|
|
|
|
print
|
|
print "Mean Reciprocal Rank for", query_total, "queries:"
|
|
print "Prefix MRR Returned of 10"
|
|
for length in range(1, 6):
|
|
printf("%d char %1.4f %1.4f\n", length, recip_rank_total_char[length - 1] / query_total, returned10_total_char[length - 1] / query_total)
|
|
for length in range(1, 6):
|
|
printf("%d word %1.4f %1.4f\n", length, recip_rank_total_word[length - 1] / query_total, returned10_total_word[length - 1] / query_total)
|
|
|