/*
* Useful references:
* https://en.wikipedia.org/wiki/Levenshtein_distance
* https://en.wikipedia.org/wiki/Vladimir_Levenshtein
*
*/
namespace App\Libraries;
class ContentMatcher
{
/*
* Will be used independently by any modules
Returning value : Percentage of matching
Options
Case censitive
Ordered keyword
Ignore penghubung kata (default)
Ignore space
Language : English only, BM only, Any
*/
/*
* Return Percentage of matching
*/
private $cached_word_permutation = [];
private $non_ordered_keyword_result = [];
public function evaluate($input_string = '', $arr_scheme = array(), $array_preposition_scheme = array(), $option = array())
{
#Trim the input
$input = trim($input_string);
#Ignore space, by default not ignore space
if (isset($option['ignore_space']) && $option['ignore_space'] == 1) {
$input = str_replace(' ', '', $input_string);
}
#Ignore preposition
if (isset($option['ignore_preposition']) && $option['ignore_preposition'] == 1) {
$filtered_words = $this->ignore_preposition($array_preposition_scheme, $input);
} else {
#By default ignore preposition
$filtered_words = explode(" ", $input_string);
}
#For non ordered keyword
if (isset($option['ordered_keyword']) && $option['ordered_keyword'] == 0) {
$this->pc_permute($filtered_words, $arr_scheme);
$key_result_ordered = array_keys($this->cached_word_permutation, max($this->cached_word_permutation));
$result = $this->non_ordered_keyword_result[$key_result_ordered[0]];
} else {
#By default was ordered
$result = $this->closest_word($filtered_words, $arr_scheme);
}
//P_TODO: How to integrate? Please review
#Language
if (isset($option['language']) && $option['language'] == 'EN') {
//English
} elseif (isset($option['language']) && $option['language'] == 'BM') {
//BM
} else {
//Any
}
//Result
$output_result['result'] = $result;
//Option
$output_result['options'] = $option;
//Input
$output_result['input']['input_string'] = $input_string;
$output_result['input']['arr_scheme'] = $arr_scheme;
$output_result['input']['array_preposition_scheme'] = $array_preposition_scheme;
return $output_result;
}
//Get all permutation of array
private function pc_permute($items, $words, $perms = array())
{
if (empty($items)) {
$elements = $perms;
$merged = array_merge($elements);
} else {
for ($i = count($items) - 1; $i >= 0; --$i) {
$newitems = $items;
$newperms = $perms;
list($foo) = array_splice($newitems, $i, 1);
array_unshift($newperms, $foo);
$this->pc_permute($newitems, $words, $newperms);
}
}
if (isset($elements)) {
$this->cached_word_permutation[] = $this->closest_word($elements, $words)['percentage_correct_%'];
$this->non_ordered_keyword_result[] = $this->closest_word($elements, $words);
}
}
private function ignore_preposition($arr_scheme_preposition = array(), $str_input_string = '')
{
$arr_input_string = explode(" ", $str_input_string);
if (count($arr_scheme_preposition) < count($arr_input_string)) {
$arr_without_preposition = array_diff($arr_scheme_preposition, $arr_input_string);
} else {
$arr_without_preposition = array_diff($arr_input_string, $arr_scheme_preposition);
}
return $arr_without_preposition;
}
private function closest_word($input_string, $words, &$percent = null)
{
$input = implode(" ", $input_string);
$shortest = -1;
foreach ($words as $word) {
$word = trim($word);
$lev = levenshtein($input, $word);
if ($lev == 0) {
$closest = $word;
$shortest = 0;
break;
}
if ($lev <= $shortest || $shortest < 0) {
$closest = $word;
$shortest = $lev;
}
}
$percent = 1 - levenshtein($input, $closest) / max(strlen($input), strlen($closest));
$output_result['input'] = $input;
$output_result['exact'] = $shortest == 0 ? 1 : 0;
$output_result['closest'] = $closest;
$output_result['percentage_correct_%'] = round($percent * 100, 2);
return $output_result;
}
}
To test
namespace App\Http\Controllers;
use App\Libraries\ContentMatcher;
use Illuminate\Http\Request;
use App\Http\Requests;
class test extends Controller
{
public function index()
{
$check = new ContentMatcher();
$input_string = '35';
$arr_scheme = array('Daya tolakan menggerakkan duit syiling','daya tolakan','Daya tolakan','RM 35','35 je','3.5 je');
$array_preposition_scheme = array("yang", "bahawa", "untuk", "oleh itu", "oleh kerana", "agar", "meskipun", "sekiranya", "semoga", "supaya", "kalau", "andaikata", "kendatipun", "hingga", "jika", "jikalau", "semasa", "sementara", "setelah", "sewaktu", "ketika", "tatkala", "walaupun", "malahan");
$options = array(
'case_sensitive' => 0,
'ordered_keyword' => 1,
'ignore_preposition' => 0,
'ignore_space' => 0,
'language' => 'BM'
);
$result = $check->evaluate($input_string,$arr_scheme,$array_preposition_scheme,$options);
pre($result);
}
}