package main import ( "bufio" "log" "os" "strings" "sync" "time" ) //ByControlPlane contains all the channels we need. type ByControlPlane struct { BadTokens chan string GoodTokens chan string StatsTokens chan string } type safeClassifier struct { sMap map[string]string busy sync.Mutex } type safeStats struct { stats map[string]int64 busy sync.Mutex } //ControPlane is the variabile var ControPlane ByControlPlane //ByClassifier is the structure containing our Pseudo-Bayes classifier. type ByClassifier struct { STATS safeStats Learning safeClassifier Working safeClassifier Generation int64 } //AddStats adds the statistics after proper blocking. func (c *ByClassifier) AddStats(action string) { c.STATS.busy.Lock() defer c.STATS.busy.Unlock() if _, exists := c.STATS.stats[action]; exists { c.STATS.stats[action]++ } else { c.STATS.stats[action] = 1 } } //IsBAD inserts a bad key in the right place. func (c *ByClassifier) IsBAD(key string) { log.Println("BAD Received", key) k := strings.Fields(key) c.Learning.busy.Lock() defer c.Learning.busy.Unlock() for _, tk := range k { if kind, exists := c.Learning.sMap[tk]; exists { switch kind { case "BAD": log.Println("Word was known as bad:", tk) case "GOOD": c.Learning.sMap[tk] = "MEH" log.Println("So sad, work was known as good", tk) case "MEH": log.Println("Word was known as ambiguos:", tk) } } else { c.Learning.sMap[tk] = "BAD" } } log.Println("BAD Learned", key) } //IsGOOD inserts the key in the right place. func (c *ByClassifier) IsGOOD(key string) { k := strings.Fields(key) log.Println("GOOD Received", key) c.Learning.busy.Lock() defer c.Learning.busy.Unlock() for _, tk := range k { if kind, exists := c.Learning.sMap[tk]; exists { switch kind { case "GOOD": log.Println("Word was known as good: ", tk) case "BAD": c.Learning.sMap[tk] = "MEH" log.Println("So sad, work was known as bad: ", tk) case "MEH": log.Println("Word was known as ambiguos: ", tk) } } else { c.Learning.sMap[tk] = "GOOD" } } log.Println("GOOD Learned", key) } //Posterior calculates Shannon based entropy using bad and good as different distributions func (c *ByClassifier) Posterior(hdr string) map[string]float64 { tokens := strings.Fields(hdr) ff := make(map[string]float64) if c.Generation == 0 || len(tokens) == 0 { ff["BAD"] = 0.5 ff["GOOD"] = 0.5 return ff } log.Println("Posterior locking the Working Bayesian") c.Working.busy.Lock() defer c.Working.busy.Unlock() var totalGood, totalBad float64 for _, tk := range tokens { if kind, exists := c.Working.sMap[tk]; exists { switch kind { case "BAD": totalBad++ case "GOOD": totalGood++ } } } ff["GOOD"] = 1 - (totalBad / float64(len(tokens))) ff["BAD"] = 1 - (totalGood / float64(len(tokens))) return ff } func (c *ByClassifier) enroll() { ControPlane.BadTokens = make(chan string, 2048) ControPlane.GoodTokens = make(chan string, 2048) ControPlane.StatsTokens = make(chan string, 2048) c.Generation = 0 c.Learning.sMap = make(map[string]string) c.Working.sMap = make(map[string]string) c.STATS.stats = make(map[string]int64) c.readInitList("blacklist.txt", "BAD") c.readInitList("whitelist.txt", "GOOD") go c.readBadTokens() go c.readGoodTokens() go c.readStatsTokens() go c.updateLearners() log.Println("Classifier populated...") } func (c *ByClassifier) readBadTokens() { log.Println("Start reading BAD tokens") for token := range ControPlane.BadTokens { log.Println("Received BAD Token: ", token) c.IsBAD(token) } } func (c *ByClassifier) readGoodTokens() { log.Println("Start reading GOOD tokens") for token := range ControPlane.GoodTokens { log.Println("Received GOOD Token: ", token) c.IsGOOD(token) } } func (c *ByClassifier) readStatsTokens() { log.Println("Start reading STATS tokens") for token := range ControPlane.StatsTokens { c.AddStats(token) } } func (c *ByClassifier) readInitList(filePath, class string) { inFile, err := os.Open(filePath) if err != nil { log.Println(err.Error() + `: ` + filePath) return } defer inFile.Close() scanner := bufio.NewScanner(inFile) for scanner.Scan() { if len(scanner.Text()) > 3 { switch class { case "BAD": log.Println("Loading into Blacklist: ", scanner.Text()) // the line c.IsBAD(scanner.Text()) case "GOOD": log.Println("Loading into Whitelist: ", scanner.Text()) // the line c.IsGOOD(scanner.Text()) } } } } func (c *ByClassifier) updateLearners() { log.Println("Bayes Updater Start...") ticker := time.NewTicker(10 * time.Second) for ; true; <-ticker.C { var currentGen int64 log.Println("Maturity is:", Maturity) log.Println("Seniority is:", ProxyFlow.seniority) if Maturity > 0 { currentGen = ProxyFlow.seniority / Maturity } else { currentGen = 0 } log.Println("Current Generation is: ", currentGen) log.Println("Working Generation is: ", c.Generation) if currentGen > c.Generation || float64(len(c.Learning.sMap)) > ProxyFlow.collection { c.Learning.busy.Lock() c.Working.busy.Lock() c.Working.sMap = c.Learning.sMap c.Learning.sMap = make(map[string]string) c.Generation = currentGen log.Println("Generation Updated to: ", c.Generation) ControPlane.StatsTokens <- "GENERATION" c.Learning.busy.Unlock() c.Working.busy.Unlock() } } }