Newer
Older
zardoz / matrix.go
package main

import (
	"bufio"
	"log"
	"os"
	"strings"
	"sync"
	"time"
)

//ByControlPlane contains all the channels we need.
type ByControlPlane struct {
	BadTokens   chan string
	GoodTokens  chan string
	StatsTokens chan string
}

type safeClassifier struct {
	sMap map[string]string
	busy sync.Mutex
}

type safeStats struct {
	stats map[string]int64
	busy  sync.Mutex
}

//ControPlane is the variabile
var ControPlane ByControlPlane

//ByClassifier is the structure containing our Pseudo-Bayes classifier.
type ByClassifier struct {
	STATS      safeStats
	Learning   safeClassifier
	Working    safeClassifier
	Generation int64
}

//AddStats adds the statistics after proper blocking.
func (c *ByClassifier) AddStats(action string) {

	c.STATS.busy.Lock()
	defer c.STATS.busy.Unlock()

	if _, exists := c.STATS.stats[action]; exists {
		c.STATS.stats[action]++
	} else {
		c.STATS.stats[action] = 1
	}

}

//IsBAD inserts a bad key in the right place.
func (c *ByClassifier) IsBAD(key string) {

	log.Println("BAD Received", key)

	k := strings.Fields(key)

	c.Learning.busy.Lock()
	defer c.Learning.busy.Unlock()

	for _, tk := range k {

		if kind, exists := c.Learning.sMap[tk]; exists {

			switch kind {
			case "BAD":
				log.Println("Word was known as bad:", tk)
			case "GOOD":
				c.Learning.sMap[tk] = "MEH"
				log.Println("So sad, work was known as good", tk)
			case "MEH":
				log.Println("Word was known as ambiguos:", tk)
			}

		} else {
			c.Learning.sMap[tk] = "BAD"
		}

	}

	log.Println("BAD Learned", key)

}

//IsGOOD inserts the key in the right place.
func (c *ByClassifier) IsGOOD(key string) {

	k := strings.Fields(key)

	log.Println("GOOD Received", key)

	c.Learning.busy.Lock()
	defer c.Learning.busy.Unlock()

	for _, tk := range k {

		if kind, exists := c.Learning.sMap[tk]; exists {

			switch kind {
			case "GOOD":
				log.Println("Word was known as good: ", tk)
			case "BAD":
				c.Learning.sMap[tk] = "MEH"
				log.Println("So sad, work was known as bad: ", tk)
			case "MEH":
				log.Println("Word was known as ambiguos: ", tk)
			}

		} else {
			c.Learning.sMap[tk] = "GOOD"
		}

	}

	log.Println("GOOD Learned", key)

}

//Posterior calculates Shannon based entropy using bad and good as different distributions
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {

	tokens := strings.Fields(sanitizeHeaders(hdr))
	ff := make(map[string]float64)

	if c.Generation == 0 || len(tokens) == 0 {
		ff["BAD"] = 0.5
		ff["GOOD"] = 0.5
		return ff

	}

	log.Println("Posterior locking the Working Bayesian")
	c.Working.busy.Lock()
	defer c.Working.busy.Unlock()

	var totalGood, totalBad float64

	for _, tk := range tokens {

		if kind, exists := c.Working.sMap[tk]; exists {

			switch kind {
			case "BAD":
				totalBad++
			case "GOOD":
				totalGood++
			}

		}

	}

	ff["GOOD"] = 1 - (totalBad / float64(len(tokens)))
	ff["BAD"] = 1 - (totalGood / float64(len(tokens)))

	return ff

}

func (c *ByClassifier) enroll() {

	ControPlane.BadTokens = make(chan string, 2048)
	ControPlane.GoodTokens = make(chan string, 2048)
	ControPlane.StatsTokens = make(chan string, 2048)

	c.Generation = 0
	c.Learning.sMap = make(map[string]string)
	c.Working.sMap = make(map[string]string)
	c.STATS.stats = make(map[string]int64)

	c.readInitList("blacklist.txt", "BAD")
	c.readInitList("whitelist.txt", "GOOD")

	go c.readBadTokens()
	go c.readGoodTokens()
	go c.readStatsTokens()
	go c.updateLearners()

	log.Println("Classifier populated...")

}

func (c *ByClassifier) readBadTokens() {

	log.Println("Start reading BAD tokens")

	for token := range ControPlane.BadTokens {
		log.Println("Received BAD Token: ", token)
		c.IsBAD(token)
	}

}

func (c *ByClassifier) readGoodTokens() {

	log.Println("Start reading GOOD tokens")

	for token := range ControPlane.GoodTokens {
		log.Println("Received GOOD Token: ", token)
		c.IsGOOD(token)
	}

}

func (c *ByClassifier) readStatsTokens() {

	log.Println("Start reading STATS tokens")

	for token := range ControPlane.StatsTokens {
		c.AddStats(token)
	}

}

func (c *ByClassifier) readInitList(filePath, class string) {

	inFile, err := os.Open(filePath)
	if err != nil {
		log.Println(err.Error() + `: ` + filePath)
		return
	}
	defer inFile.Close()

	scanner := bufio.NewScanner(inFile)
	for scanner.Scan() {

		if len(scanner.Text()) > 3 {
			switch class {
			case "BAD":
				log.Println("Loading into Blacklist: ", scanner.Text()) // the line
				c.IsBAD(scanner.Text())
			case "GOOD":
				log.Println("Loading into Whitelist: ", scanner.Text()) // the line
				c.IsGOOD(scanner.Text())
			}
		}
	}

}

func (c *ByClassifier) updateLearners() {

	log.Println("Bayes Updater Start...")

	ticker := time.NewTicker(10 * time.Second)

	for ; true; <-ticker.C {
		var currentGen int64
		log.Println("Maturity is:", Maturity)
		log.Println("Seniority is:", ProxyFlow.seniority)
		if Maturity > 0 {
			currentGen = ProxyFlow.seniority / Maturity
		} else {
			currentGen = 0
		}
		log.Println("Current Generation is: ", currentGen)
		log.Println("Working Generation is: ", c.Generation)
		if currentGen > c.Generation || float64(len(c.Learning.sMap)) > ProxyFlow.collection {
			c.Learning.busy.Lock()
			c.Working.busy.Lock()
			c.Working.sMap = c.Learning.sMap
			c.Learning.sMap = make(map[string]string)
			c.Generation = currentGen
			log.Println("Generation Updated to: ", c.Generation)
			ControPlane.StatsTokens <- "GENERATION"
			c.Learning.busy.Unlock()
			c.Working.busy.Unlock()

		}

	}

}