You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
277 lines
5.4 KiB
Go
277 lines
5.4 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
//ByControlPlane contains all the channels we need.
|
|
type ByControlPlane struct {
|
|
BadTokens chan string
|
|
GoodTokens chan string
|
|
StatsTokens chan string
|
|
}
|
|
|
|
type safeClassifier struct {
|
|
sMap map[string]string
|
|
busy sync.Mutex
|
|
}
|
|
|
|
type safeStats struct {
|
|
stats map[string]int64
|
|
busy sync.Mutex
|
|
}
|
|
|
|
//ControPlane is the variabile
|
|
var ControPlane ByControlPlane
|
|
|
|
//ByClassifier is the structure containing our Pseudo-Bayes classifier.
|
|
type ByClassifier struct {
|
|
STATS safeStats
|
|
Learning safeClassifier
|
|
Working safeClassifier
|
|
Generation int64
|
|
}
|
|
|
|
//AddStats adds the statistics after proper blocking.
|
|
func (c *ByClassifier) AddStats(action string) {
|
|
|
|
c.STATS.busy.Lock()
|
|
defer c.STATS.busy.Unlock()
|
|
|
|
if _, exists := c.STATS.stats[action]; exists {
|
|
c.STATS.stats[action]++
|
|
} else {
|
|
c.STATS.stats[action] = 1
|
|
}
|
|
|
|
}
|
|
|
|
//IsBAD inserts a bad key in the right place.
|
|
func (c *ByClassifier) IsBAD(key string) {
|
|
|
|
log.Println("BAD Received", key)
|
|
|
|
k := strings.Fields(key)
|
|
|
|
c.Learning.busy.Lock()
|
|
defer c.Learning.busy.Unlock()
|
|
|
|
for _, tk := range k {
|
|
|
|
if kind, exists := c.Learning.sMap[tk]; exists {
|
|
|
|
switch kind {
|
|
case "BAD":
|
|
log.Println("Word was known as bad:", tk)
|
|
case "GOOD":
|
|
c.Learning.sMap[tk] = "MEH"
|
|
log.Println("So sad, work was known as good", tk)
|
|
case "MEH":
|
|
log.Println("Word was known as ambiguos:", tk)
|
|
}
|
|
|
|
} else {
|
|
c.Learning.sMap[tk] = "BAD"
|
|
}
|
|
|
|
}
|
|
|
|
log.Println("BAD Learned", key)
|
|
|
|
}
|
|
|
|
//IsGOOD inserts the key in the right place.
|
|
func (c *ByClassifier) IsGOOD(key string) {
|
|
|
|
k := strings.Fields(key)
|
|
|
|
log.Println("GOOD Received", key)
|
|
|
|
c.Learning.busy.Lock()
|
|
defer c.Learning.busy.Unlock()
|
|
|
|
for _, tk := range k {
|
|
|
|
if kind, exists := c.Learning.sMap[tk]; exists {
|
|
|
|
switch kind {
|
|
case "GOOD":
|
|
log.Println("Word was known as good: ", tk)
|
|
case "BAD":
|
|
c.Learning.sMap[tk] = "MEH"
|
|
log.Println("So sad, work was known as bad: ", tk)
|
|
case "MEH":
|
|
log.Println("Word was known as ambiguos: ", tk)
|
|
}
|
|
|
|
} else {
|
|
c.Learning.sMap[tk] = "GOOD"
|
|
}
|
|
|
|
}
|
|
|
|
log.Println("GOOD Learned", key)
|
|
|
|
}
|
|
|
|
//Posterior calculates Shannon based entropy using bad and good as different distributions
|
|
func (c *ByClassifier) Posterior(hdr string) map[string]float64 {
|
|
|
|
tokens := strings.Fields(hdr)
|
|
ff := make(map[string]float64)
|
|
|
|
if c.Generation == 0 || len(tokens) == 0 {
|
|
ff["BAD"] = 0.5
|
|
ff["GOOD"] = 0.5
|
|
return ff
|
|
|
|
}
|
|
|
|
log.Println("Posterior locking the Working Bayesian")
|
|
c.Working.busy.Lock()
|
|
defer c.Working.busy.Unlock()
|
|
|
|
var totalGood, totalBad float64
|
|
|
|
for _, tk := range tokens {
|
|
|
|
if kind, exists := c.Working.sMap[tk]; exists {
|
|
|
|
switch kind {
|
|
case "BAD":
|
|
totalBad++
|
|
case "GOOD":
|
|
totalGood++
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ff["GOOD"] = 1 - (totalBad / float64(len(tokens)))
|
|
ff["BAD"] = 1 - (totalGood / float64(len(tokens)))
|
|
|
|
return ff
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) enroll() {
|
|
|
|
ControPlane.BadTokens = make(chan string, 2048)
|
|
ControPlane.GoodTokens = make(chan string, 2048)
|
|
ControPlane.StatsTokens = make(chan string, 2048)
|
|
|
|
c.Generation = 0
|
|
c.Learning.sMap = make(map[string]string)
|
|
c.Working.sMap = make(map[string]string)
|
|
c.STATS.stats = make(map[string]int64)
|
|
|
|
c.readInitList("blacklist.txt", "BAD")
|
|
c.readInitList("whitelist.txt", "GOOD")
|
|
|
|
go c.readBadTokens()
|
|
go c.readGoodTokens()
|
|
go c.readStatsTokens()
|
|
go c.updateLearners()
|
|
|
|
log.Println("Classifier populated...")
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) readBadTokens() {
|
|
|
|
log.Println("Start reading BAD tokens")
|
|
|
|
for token := range ControPlane.BadTokens {
|
|
log.Println("Received BAD Token: ", token)
|
|
c.IsBAD(token)
|
|
}
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) readGoodTokens() {
|
|
|
|
log.Println("Start reading GOOD tokens")
|
|
|
|
for token := range ControPlane.GoodTokens {
|
|
log.Println("Received GOOD Token: ", token)
|
|
c.IsGOOD(token)
|
|
}
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) readStatsTokens() {
|
|
|
|
log.Println("Start reading STATS tokens")
|
|
|
|
for token := range ControPlane.StatsTokens {
|
|
c.AddStats(token)
|
|
}
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) readInitList(filePath, class string) {
|
|
|
|
inFile, err := os.Open(filePath)
|
|
if err != nil {
|
|
log.Println(err.Error() + `: ` + filePath)
|
|
return
|
|
}
|
|
defer inFile.Close()
|
|
|
|
scanner := bufio.NewScanner(inFile)
|
|
for scanner.Scan() {
|
|
|
|
if len(scanner.Text()) > 3 {
|
|
switch class {
|
|
case "BAD":
|
|
log.Println("Loading into Blacklist: ", scanner.Text()) // the line
|
|
c.IsBAD(scanner.Text())
|
|
case "GOOD":
|
|
log.Println("Loading into Whitelist: ", scanner.Text()) // the line
|
|
c.IsGOOD(scanner.Text())
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func (c *ByClassifier) updateLearners() {
|
|
|
|
log.Println("Bayes Updater Start...")
|
|
|
|
ticker := time.NewTicker(10 * time.Second)
|
|
|
|
for ; true; <-ticker.C {
|
|
var currentGen int64
|
|
log.Println("Maturity is:", Maturity)
|
|
log.Println("Seniority is:", ProxyFlow.seniority)
|
|
if Maturity > 0 {
|
|
currentGen = ProxyFlow.seniority / Maturity
|
|
} else {
|
|
currentGen = 0
|
|
}
|
|
log.Println("Current Generation is: ", currentGen)
|
|
log.Println("Working Generation is: ", c.Generation)
|
|
if currentGen > c.Generation || float64(len(c.Learning.sMap)) > ProxyFlow.collection {
|
|
c.Learning.busy.Lock()
|
|
c.Working.busy.Lock()
|
|
c.Working.sMap = c.Learning.sMap
|
|
c.Learning.sMap = make(map[string]string)
|
|
c.Generation = currentGen
|
|
log.Println("Generation Updated to: ", c.Generation)
|
|
ControPlane.StatsTokens <- "GENERATION"
|
|
c.Learning.busy.Unlock()
|
|
c.Working.busy.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|