Tuesday, March 14, 2017

Algorithm for Structure marking

/*
 * Useful references:
 * https://en.wikipedia.org/wiki/Levenshtein_distance
 * https://en.wikipedia.org/wiki/Vladimir_Levenshtein
 *
 */
namespace App\Libraries;


class ContentMatcher
{

    /*
     *  Will be used independently by any modules
        Returning value : Percentage of matching
        Options
        Case censitive
        Ordered keyword
        Ignore penghubung kata (default)
        Ignore space
        Language : English only, BM only, Any

     */
    /*
     * Return Percentage of matching
     */

    private $cached_word_permutation = [];
    private $non_ordered_keyword_result = [];

    public function evaluate($input_string = '', $arr_scheme = array(), $array_preposition_scheme = array(), $option = array())
    {
        #Trim the input
        $input = trim($input_string);

        #Ignore space, by default not ignore space
        if (isset($option['ignore_space']) && $option['ignore_space'] == 1) {
            $input = str_replace(' ', '', $input_string);
        }

        #Ignore preposition
        if (isset($option['ignore_preposition']) && $option['ignore_preposition'] == 1) {
            $filtered_words = $this->ignore_preposition($array_preposition_scheme, $input);
        } else {
            #By default ignore preposition
            $filtered_words = explode(" ", $input_string);
        }

        #For non ordered keyword
        if (isset($option['ordered_keyword']) && $option['ordered_keyword'] == 0) {
            $this->pc_permute($filtered_words, $arr_scheme);
            $key_result_ordered = array_keys($this->cached_word_permutation, max($this->cached_word_permutation));
            $result = $this->non_ordered_keyword_result[$key_result_ordered[0]];
        } else {
            #By default was ordered
            $result = $this->closest_word($filtered_words, $arr_scheme);
        }


        //P_TODO: How to integrate? Please review
        #Language
        if (isset($option['language']) && $option['language'] == 'EN') {
            //English
        } elseif (isset($option['language']) && $option['language'] == 'BM') {
            //BM
        } else {
            //Any
        }

        //Result
        $output_result['result'] = $result;

        //Option
        $output_result['options'] = $option;

        //Input
        $output_result['input']['input_string'] = $input_string;
        $output_result['input']['arr_scheme'] = $arr_scheme;
        $output_result['input']['array_preposition_scheme'] = $array_preposition_scheme;



        return $output_result;
    }


    //Get all permutation of array
    private function pc_permute($items, $words, $perms = array())
    {

        if (empty($items)) {
            $elements = $perms;
            $merged = array_merge($elements);
        } else {
            for ($i = count($items) - 1; $i >= 0; --$i) {
                $newitems = $items;
                $newperms = $perms;
                list($foo) = array_splice($newitems, $i, 1);
                array_unshift($newperms, $foo);
                $this->pc_permute($newitems, $words, $newperms);
            }
        }

        if (isset($elements)) {
            $this->cached_word_permutation[] = $this->closest_word($elements, $words)['percentage_correct_%'];
            $this->non_ordered_keyword_result[] = $this->closest_word($elements, $words);
        }
    }


    private function ignore_preposition($arr_scheme_preposition = array(), $str_input_string = '')
    {
        $arr_input_string = explode(" ", $str_input_string);

        if (count($arr_scheme_preposition) < count($arr_input_string)) {
            $arr_without_preposition = array_diff($arr_scheme_preposition, $arr_input_string);
        } else {
            $arr_without_preposition = array_diff($arr_input_string, $arr_scheme_preposition);
        }
        return $arr_without_preposition;
    }


    private function closest_word($input_string, $words, &$percent = null)
    {
        $input = implode(" ", $input_string);
        $shortest = -1;
        foreach ($words as $word) {

            $word = trim($word);
            $lev = levenshtein($input, $word);

            if ($lev == 0) {
                $closest = $word;
                $shortest = 0;
                break;
            }

            if ($lev <= $shortest || $shortest < 0) {
                $closest = $word;
                $shortest = $lev;
            }
        }

        $percent = 1 - levenshtein($input, $closest) / max(strlen($input), strlen($closest));

        $output_result['input'] = $input;
        $output_result['exact'] = $shortest == 0 ? 1 : 0;
        $output_result['closest'] = $closest;
        $output_result['percentage_correct_%'] = round($percent * 100, 2);

        return $output_result;
    }


}
To test
 

namespace App\Http\Controllers;

use App\Libraries\ContentMatcher;
use Illuminate\Http\Request;

use App\Http\Requests;

class test extends Controller
{
    public function index()
    {
        $check = new ContentMatcher();


        $input_string = '35';
        $arr_scheme = array('Daya tolakan menggerakkan duit syiling','daya tolakan','Daya tolakan','RM 35','35 je','3.5 je');
        $array_preposition_scheme = array("yang", "bahawa", "untuk", "oleh itu", "oleh kerana", "agar", "meskipun", "sekiranya", "semoga", "supaya", "kalau", "andaikata", "kendatipun", "hingga", "jika", "jikalau", "semasa", "sementara", "setelah", "sewaktu", "ketika", "tatkala", "walaupun", "malahan");

        $options = array(
            'case_sensitive'     => 0,
            'ordered_keyword'    => 1,
            'ignore_preposition' => 0,
            'ignore_space'       => 0,
            'language'           => 'BM'
        );
        $result = $check->evaluate($input_string,$arr_scheme,$array_preposition_scheme,$options);
        pre($result);

    }
}

No comments:

Post a Comment