Картинка блога

Нашел великолепный генератор имен на основе цепочек Маркова. Написан на C# подходит для тестов. Добавил только словарь и тест.

Источник: https://github.com/tomba/dwarrowdelf/blob/master/Common/Lib/MarkovNameGenerator.cs

Вот сам генератор:


public sealed class MarkovNameGenerator
 {
 public static readonly string[] NameSamples = {
 "Azog","Balcmeg","Boldog","Bolg","Golfimbul","Gorbag","Gorgol","Grishnákh","Lagduf","Lug","Lugdush","Mauhúr","Muzgash","Orcobal","Othrod","Radbug","Shagrat","Snaga","Ufthak","Uglúk","Aiden","Jackson","Mason","Liam","Jacob","Jayden","Ethan","Noah","Lucas","Logan","Caleb","Caden","Jack","Ryan","Connor","Michael","Elijah","Brayden","Benjamin","Nicholas","Alexander","William","Matthew","James","Landon","Nathan","Dylan","Evan","Luke","Andrew","Gabriel","Gavin","Joshua","Owen","Danil","Carter","Tyler","Cameron","Christian","Wyatt","Henry","Eli","Joseph","Max","Isaac","Samuel","Anthony","Grayson","Zachary","David","Christopher","Jhn","Isaiah","Levi","Jonathan","Oliver","Chase","Cooper","Tristan","Colton","Austin","Colin","Charlie","Dominic","Parker","Hunter","Thomas","Alex","Ia","Jordan","Cole","Julian","Aaron","Carson","Miles","Blake","Brody","Adam","Sebastian","Adrian","Nolan","Sean","Riley","Bentley","Xavier","Hayden","Jeemiah","Jason","Jake","Asher","Micah","Jace","Brandon","Josiah","Hudson","Nathaniel","Bryson","Ryder","Justin","Bryce","Sophia","Emma","Isabella","Oliia","Ava","Lily","Chloe","Madison","Emily","Abigail","Addison","Mia","Madelyn","Ella","Hailey","Kaylee","Avery","Kaitlyn","Riley","Aubrey","Brooklyn","Peyton","Layla","Hannah","Charlotte","Bella","Natalie","Sarah","Grace","Amelia","Kylie","Arianna","Anna","Elizabeth","Sophie","Claire","Lila","Aaliya","Gabriella","Elise","Lillian","Samantha","Makayla","Audrey","Alyssa","Ellie","Alexis","Isabelle","Savannah","Evelyn","Leah","Keira","Allison","Maya","Lucy","Sydney","Taylor","Molly","Lauren","Harper","Scarlett","Brianna","Victoria","Liliana","Aria","Kayla","Annabelle","Gianna","Kennedy","Stella","eagan","Julia","Bailey","Alexandra","Jordyn","Nora","Carolin","Mackenzie","Jasmine","Jocelyn","Kendall","Morgan","Nevaeh","Maria","Eva","Juliana","Abby","Alexa","Summer","Booke","Penelope","Violet","Kate","Hadley","Ashlyn","Sadie","Paige","Katherine","Sienna","Piper",
 };

public static MarkovNameGenerator CreateDefault()
 {
 return new MarkovNameGenerator(NameSamples, 1, 4);
 }

public MarkovNameGenerator(IEnumerable<string> sampleNames, int order, int minLength)
 {
 //fix parameter values
 if (order < 1)
 order = 1;
 if (minLength < 1)
 minLength = 1;

m_order = order;
 m_minLength = minLength;

//split comma delimited lines
 foreach (string line in sampleNames)
 {
 string[] tokens = line.Split(',');
 foreach (string word in tokens)
 {
 string upper = word.Trim().ToUpper();
 if (upper.Length < order + 1)
 continue;
 m_samples.Add(upper);
 }
 }

//Build chains
 foreach (string word in m_samples)
 {
 for (int letter = 0; letter < word.Length - order; letter++)
 {
 string token = word.Substring(letter, order);
 List<char> entry;
 if (m_chains.ContainsKey(token))
 entry = m_chains[token];
 else
 {
 entry = new List<char>();
 m_chains[token] = entry;
 }
 entry.Add(word[letter + order]);
 }
 }
 }

//Get the next random name
 public string GetNextName()
 {
 //get a random token somewhere in middle of sample word
 string s;

do
 {
 int n = m_rnd.Next(m_samples.Count);
 int nameLength = m_samples[n].Length;
 s = m_samples[n].Substring(m_rnd.Next(0, m_samples[n].Length - m_order), m_order);
 while (s.Length < nameLength)
 {
 string token = s.Substring(s.Length - m_order, m_order);
 char c = GetLetter(token);
 if (c != '?')
 s += GetLetter(token);
 else
 break;
 }

if (s.Contains(" "))
 {
 string[] tokens = s.Split(' ');
 s = "";
 for (int t = 0; t < tokens.Length; t++)
 {
 if (tokens[t] == "")
 continue;
 if (tokens[t].Length == 1)
 tokens[t] = tokens[t].ToUpper();
 else
 tokens[t] = tokens[t].Substring(0, 1) + tokens[t].Substring(1).ToLower();
 if (s != "")
 s += " ";
 s += tokens[t];
 }
 }
 else
 s = s.Substring(0, 1) + s.Substring(1).ToLower();
 }
 while (m_used.Contains(s) || s.Length < m_minLength);

m_used.Add(s);

return s;
 }

//Reset the used names
 public void Reset()
 {
 m_used.Clear();
 }

private Dictionary<string, List<char>> m_chains = new Dictionary<string, List<char>>();
 private List<string> m_samples = new List<string>();
 private List<string> m_used = new List<string>();
 private Random m_rnd = new Random();
 private int m_order;
 private int m_minLength;

//Get a random letter from the chain
 char GetLetter(string token)
 {
 if (!m_chains.ContainsKey(token))
 return '?';

List<char> letters = m_chains[token];
 int n = m_rnd.Next(letters.Count);

return letters[n];
 }
 }

 

И тест


[TestMethod]
 public void MarkovNamesAreUniqueTest()
 {
 var gen1 = MarkovNameGenerator.CreateDefault();
 var gen2 = MarkovNameGenerator.CreateDefault();

var genList1 = new List<string>();
 var genList2 = new List<string>();
 for (int i = 0; i < 10000; i++)
 {
 string nextName1 = gen1.GetNextName();
 genList1.Add(nextName1);
 string nextName2 = gen2.GetNextName();
 genList2.Add(nextName2);

Assert.AreEqual(nextName1, nextName2);
 }

Assert.AreEqual(genList1.Count, genList1.Distinct().Count());
 Assert.AreEqual(genList2.Count, genList2.Distinct().Count());
 }

Похожие статьи

    Нет похожих статей.

Добавить комментарий

Ваш e-mail не будет опубликован. Обязательные поля помечены *

Можно использовать следующие HTML-теги и атрибуты: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <s> <strike> <strong>