Чи є спосіб перетворити числові слова в цілі числа?

Question 1

Мені потрібно перетворити oneна 1, twoв 2тощо.

Чи є спосіб зробити це за допомогою бібліотеки, класу чи чогось іншого?

Question 2

Більшість цього коду - це налаштування dword з numwords, що робиться лише під час першого дзвінка.

def text2int(textnum, numwords={}):
    if not numwords:
      units = [
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
        "sixteen", "seventeen", "eighteen", "nineteen",
      ]

      tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]

      scales = ["hundred", "thousand", "million", "billion", "trillion"]

      numwords["and"] = (1, 0)
      for idx, word in enumerate(units):    numwords[word] = (1, idx)
      for idx, word in enumerate(tens):     numwords[word] = (1, idx * 10)
      for idx, word in enumerate(scales):   numwords[word] = (10 ** (idx * 3 or 2), 0)

    current = result = 0
    for word in textnum.split():
        if word not in numwords:
          raise Exception("Illegal word: " + word)

        scale, increment = numwords[word]
        current = current * scale + increment
        if scale > 100:
            result += current
            current = 0

    return result + current

print text2int("seven billion one hundred million thirty one thousand three hundred thirty seven")
#7100031337

Question 3

Я щойно випустив для PyPI модуль python, який називається word2number, з точною метою. https://github.com/akshaynagpal/w2n

Встановіть його за допомогою:

pip install word2number

переконайтеся, що ваш піп оновлено до останньої версії.

Використання:

from word2number import w2n

print w2n.word_to_num("two million three thousand nine hundred and eighty four")
2003984

Question 4

Якщо комусь цікаво, я зламав версію, яка підтримує решту рядка (хоча в ньому можуть бути помилки, я не занадто тестував).

def text2int (textnum, numwords={}):
    if not numwords:
        units = [
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
        "sixteen", "seventeen", "eighteen", "nineteen",
        ]

        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]

        scales = ["hundred", "thousand", "million", "billion", "trillion"]

        numwords["and"] = (1, 0)
        for idx, word in enumerate(units):  numwords[word] = (1, idx)
        for idx, word in enumerate(tens):       numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)

    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]

    textnum = textnum.replace('-', ' ')

    current = result = 0
    curstring = ""
    onnumber = False
    for word in textnum.split():
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
            onnumber = True
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = "%s%s" % (word[:-len(ending)], replacement)

            if word not in numwords:
                if onnumber:
                    curstring += repr(result + current) + " "
                curstring += word + " "
                result = current = 0
                onnumber = False
            else:
                scale, increment = numwords[word]

                current = current * scale + increment
                if scale > 100:
                    result += current
                    current = 0
                onnumber = True

    if onnumber:
        curstring += repr(result + current)

    return curstring

Приклад:

 >>> text2int("I want fifty five hot dogs for two hundred dollars.")
 I want 55 hot dogs for 200 dollars.

Якщо у вас є, скажімо, "200 доларів", можуть виникнути проблеми. Але, це було дійсно грубо.

Question 5

Мені потрібно було щось дещо інше, оскільки моє введення полягає у перетворенні мови в текст, і рішення не завжди полягає в підсумовуванні цифр. Наприклад, "мій поштовий індекс один два три чотири п'ять" не повинен перетворюватися на "мій поштовий індекс 15".

Я взяв відповідь Андрія і налагоджений його обробляти кілька інших випадки людей підсвічених як помилки, а також доданий підтримка прикладів , як ZipCode я згадав вище. Деякі основні тестові приклади наведені нижче, але я впевнений, що ще є місце для вдосконалення.

def is_number(x):
    if type(x) == str:
        x = x.replace(',', '')
    try:
        float(x)
    except:
        return False
    return True

def text2int (textnum, numwords={}):
    units = [
        'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
        'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
        'sixteen', 'seventeen', 'eighteen', 'nineteen',
    ]
    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
    scales = ['hundred', 'thousand', 'million', 'billion', 'trillion']
    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]

    if not numwords:
        numwords['and'] = (1, 0)
        for idx, word in enumerate(units): numwords[word] = (1, idx)
        for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)

    textnum = textnum.replace('-', ' ')

    current = result = 0
    curstring = ''
    onnumber = False
    lastunit = False
    lastscale = False

    def is_numword(x):
        if is_number(x):
            return True
        if word in numwords:
            return True
        return False

    def from_numword(x):
        if is_number(x):
            scale = 0
            increment = int(x.replace(',', ''))
            return scale, increment
        return numwords[x]

    for word in textnum.split():
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
            onnumber = True
            lastunit = False
            lastscale = False
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = "%s%s" % (word[:-len(ending)], replacement)

            if (not is_numword(word)) or (word == 'and' and not lastscale):
                if onnumber:
                    # Flush the current number we are building
                    curstring += repr(result + current) + " "
                curstring += word + " "
                result = current = 0
                onnumber = False
                lastunit = False
                lastscale = False
            else:
                scale, increment = from_numword(word)
                onnumber = True

                if lastunit and (word not in scales):                                                                                                                                                                                                                                         
                    # Assume this is part of a string of individual numbers to                                                                                                                                                                                                                
                    # be flushed, such as a zipcode "one two three four five"                                                                                                                                                                                                                 
                    curstring += repr(result + current)                                                                                                                                                                                                                                       
                    result = current = 0                                                                                                                                                                                                                                                      

                if scale > 1:                                                                                                                                                                                                                                                                 
                    current = max(1, current)                                                                                                                                                                                                                                                 

                current = current * scale + increment                                                                                                                                                                                                                                         
                if scale > 100:                                                                                                                                                                                                                                                               
                    result += current                                                                                                                                                                                                                                                         
                    current = 0                                                                                                                                                                                                                                                               

                lastscale = False                                                                                                                                                                                                              
                lastunit = False                                                                                                                                                
                if word in scales:                                                                                                                                                                                                             
                    lastscale = True                                                                                                                                                                                                         
                elif word in units:                                                                                                                                                                                                             
                    lastunit = True

    if onnumber:
        curstring += repr(result + current)

    return curstring

Деякі тести ...

one two three -> 123
three forty five -> 345
three and forty five -> 3 and 45
three hundred and forty five -> 345
three hundred -> 300
twenty five hundred -> 2500
three thousand and six -> 3006
three thousand six -> 3006
nineteenth -> 19
twentieth -> 20
first -> 1
my zip is one two three four five -> my zip is 12345
nineteen ninety six -> 1996
fifty-seventh -> 57
one million -> 1000000
first hundred -> 100
I will buy the first thousand -> I will buy the 1000  # probably should leave ordinal in the string
thousand -> 1000
hundred and six -> 106
1 million -> 1000000

Question 6

Дякую за фрагмент коду ... заощадив мені багато часу!

Мені потрібно було розглянути кілька додаткових випадків синтаксичного аналізу, таких як порядкові слова ("перший", "другий"), слова з переносом ("сто") і дефісні порядкові слова, такі як ("п'ятдесят сьомий"), тому я додав пара рядків:

def text2int(textnum, numwords={}):
    if not numwords:
        units = [
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
        "sixteen", "seventeen", "eighteen", "nineteen",
        ]

        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]

        scales = ["hundred", "thousand", "million", "billion", "trillion"]

        numwords["and"] = (1, 0)
        for idx, word in enumerate(units):  numwords[word] = (1, idx)
        for idx, word in enumerate(tens):       numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)

    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 'eighth':8, 'ninth':9, 'twelfth':12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]

    textnum = textnum.replace('-', ' ')

    current = result = 0
    for word in textnum.split():
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = "%s%s" % (word[:-len(ending)], replacement)

            if word not in numwords:
                raise Exception("Illegal word: " + word)

            scale, increment = numwords[word]

         current = current * scale + increment
         if scale > 100:
            result += current
            current = 0

    return result + current`

Question 7

Ось тривіальний підхід:

>>> number = {'one':1,
...           'two':2,
...           'three':3,}
>>> 
>>> number['two']
2

Або ви шукаєте щось, що впорається з "дванадцятьма тисячами сто сімдесят двома" ?

Question 8

Це реалізація коду c # у 1-й відповіді:

public static double ConvertTextToNumber(string text)
{
    string[] units = new string[] {
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
        "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
        "sixteen", "seventeen", "eighteen", "nineteen",
    };

    string[] tens = new string[] {"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};

    string[] scales = new string[] { "hundred", "thousand", "million", "billion", "trillion" };

    Dictionary<string, ScaleIncrementPair> numWord = new Dictionary<string, ScaleIncrementPair>();
    numWord.Add("and", new ScaleIncrementPair(1, 0));
    for (int i = 0; i < units.Length; i++)
    {
        numWord.Add(units[i], new ScaleIncrementPair(1, i));
    }

    for (int i = 1; i < tens.Length; i++)
    {
        numWord.Add(tens[i], new ScaleIncrementPair(1, i * 10));                
    }

    for (int i = 0; i < scales.Length; i++)
    {
        if(i == 0)
            numWord.Add(scales[i], new ScaleIncrementPair(100, 0));
        else
            numWord.Add(scales[i], new ScaleIncrementPair(Math.Pow(10, (i*3)), 0));
    }

    double current = 0;
    double result = 0;

    foreach (var word in text.Split(new char[] { ' ', '-', '—'}))
    {
        ScaleIncrementPair scaleIncrement = numWord[word];
        current = current * scaleIncrement.scale + scaleIncrement.increment;
        if (scaleIncrement.scale > 100)
        {
            result += current;
            current = 0;
        }
    }
    return result + current;
}


public struct ScaleIncrementPair
{
    public double scale;
    public int increment;
    public ScaleIncrementPair(double s, int i)
    {
        scale = s;
        increment = i;
    }
}

Question 9

Це можна легко закодувати у словник, якщо існує обмежена кількість цифр, які ви хочете проаналізувати.

Для дещо складніших випадків вам, мабуть, захочеться автоматично створити цей словник на основі порівняно простої граматики чисел. Щось подібне до цього (звичайно, узагальнено ...)

for i in range(10):
   myDict[30 + i] = "thirty-" + singleDigitsDict[i]

Якщо вам потрібно щось більш широке, тоді, схоже, вам знадобляться засоби обробки природного мови. Ця стаття може стати гарною відправною точкою.

Question 10

Швидкий і брудний порт Java реалізації e_h на C # (вгорі). Зверніть увагу, що обидва повертають double, а не int.

public class Text2Double {

    public double Text2Double(String text) {

        String[] units = new String[]{
                "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
                "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
                "sixteen", "seventeen", "eighteen", "nineteen",
        };

        String[] tens = new String[]{"", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};

        String[] scales = new String[]{"hundred", "thousand", "million", "billion", "trillion"};

        Map<String, ScaleIncrementPair> numWord = new LinkedHashMap<>();
        numWord.put("and", new ScaleIncrementPair(1, 0));


        for (int i = 0; i < units.length; i++) {
            numWord.put(units[i], new ScaleIncrementPair(1, i));
        }

        for (int i = 1; i < tens.length; i++) {
            numWord.put(tens[i], new ScaleIncrementPair(1, i * 10));
        }

        for (int i = 0; i < scales.length; i++) {
            if (i == 0)
                numWord.put(scales[i], new ScaleIncrementPair(100, 0));
            else
                numWord.put(scales[i], new ScaleIncrementPair(Math.pow(10, (i * 3)), 0));
        }

        double current = 0;
        double result = 0;

        for(String word : text.split("[ -]"))
        {
            ScaleIncrementPair scaleIncrement = numWord.get(word);
            current = current * scaleIncrement.scale + scaleIncrement.increment;
            if (scaleIncrement.scale > 100) {
                result += current;
                current = 0;
            }
        }
        return result + current;
    }
}

public class ScaleIncrementPair
{
    public double scale;
    public int increment;

    public ScaleIncrementPair(double s, int i)
    {
        scale = s;
        increment = i;
    }
}

Question 11

Змінено так, що text2int (масштаб) поверне правильну конверсію. Наприклад, text2int ("сотня") => 100.

import re

numwords = {}


def text2int(textnum):

    if not numwords:

        units = [ "zero", "one", "two", "three", "four", "five", "six",
                "seven", "eight", "nine", "ten", "eleven", "twelve",
                "thirteen", "fourteen", "fifteen", "sixteen", "seventeen",
                "eighteen", "nineteen"]

        tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", 
                "seventy", "eighty", "ninety"]

        scales = ["hundred", "thousand", "million", "billion", "trillion", 
                'quadrillion', 'quintillion', 'sexillion', 'septillion', 
                'octillion', 'nonillion', 'decillion' ]

        numwords["and"] = (1, 0)
        for idx, word in enumerate(units): numwords[word] = (1, idx)
        for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
        for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)

    ordinal_words = {'first':1, 'second':2, 'third':3, 'fifth':5, 
            'eighth':8, 'ninth':9, 'twelfth':12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]
    current = result = 0
    tokens = re.split(r"[\s-]+", textnum)
    for word in tokens:
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = "%s%s" % (word[:-len(ending)], replacement)

            if word not in numwords:
                raise Exception("Illegal word: " + word)

            scale, increment = numwords[word]

        if scale > 1:
            current = max(1, current)

        current = current * scale + increment
        if scale > 100:
            result += current
            current = 0

    return result + current

Question 12

Це робить рубіновий самоцвіт Марка Бернса. Нещодавно я роздвоїв його, щоб додати підтримку роками. Ви можете викликати рубіновий код з python .

  require 'numbers_in_words'
  require 'numbers_in_words/duck_punch'

  nums = ["fifteen sixteen", "eighty five sixteen",  "nineteen ninety six",
          "one hundred and seventy nine", "thirteen hundred", "nine thousand two hundred and ninety seven"]
  nums.each {|n| p n; p n.in_numbers}

результати:
"fifteen sixteen" 1516 "eighty five sixteen" 8516 "nineteen ninety six" 1996 "one hundred and seventy nine" 179 "thirteen hundred" 1300 "nine thousand two hundred and ninety seven" 9297

Question 13

Скористайтеся пакетом python: WordToDigits

pip встановити wordtodigits

Він може знайти числа, присутні у формі слова у реченні, а потім перетворити їх у відповідний числовий формат. Також дбає про десяткову частину, якщо вона є. Слово подання чисел може бути де завгодно в уривку .

https://pypi.org/project/wordtodigits/

Question 14

Швидким рішенням є використання inflect.py для створення словника для перекладу.

inflect.py має number_to_words()функцію, яка перетворює число (наприклад 2) у його форму слова (наприклад 'two'). На жаль, його зворотний бік (що дозволить вам уникнути маршруту словника перекладів) не пропонується. Тим не менш, ви можете використовувати цю функцію для створення словника перекладів:

>>> import inflect
>>> p = inflect.engine()
>>> word_to_number_mapping = {}
>>>
>>> for i in range(1, 100):
...     word_form = p.number_to_words(i)  # 1 -> 'one'
...     word_to_number_mapping[word_form] = i
...
>>> print word_to_number_mapping['one']
1
>>> print word_to_number_mapping['eleven']
11
>>> print word_to_number_mapping['forty-three']
43

Якщо ви готові зробити певний час, можливо, можна вивчити внутрішню роботу number_to_words()функції inflect.py та побудувати власний код, щоб робити це динамічно (я не намагався це зробити).

Question 15

Я взяв логіку @ rekurziv і перейшов у Ruby. Я також кодував таблицю пошуку, тому вона не така крута, але може допомогти новачкові зрозуміти, що відбувається.

WORDNUMS = {"zero"=> [1,0], "one"=> [1,1], "two"=> [1,2], "three"=> [1,3],
            "four"=> [1,4], "five"=> [1,5], "six"=> [1,6], "seven"=> [1,7], 
            "eight"=> [1,8], "nine"=> [1,9], "ten"=> [1,10], 
            "eleven"=> [1,11], "twelve"=> [1,12], "thirteen"=> [1,13], 
            "fourteen"=> [1,14], "fifteen"=> [1,15], "sixteen"=> [1,16], 
            "seventeen"=> [1,17], "eighteen"=> [1,18], "nineteen"=> [1,19], 
            "twenty"=> [1,20], "thirty" => [1,30], "forty" => [1,40], 
            "fifty" => [1,50], "sixty" => [1,60], "seventy" => [1,70], 
            "eighty" => [1,80], "ninety" => [1,90],
            "hundred" => [100,0], "thousand" => [1000,0], 
            "million" => [1000000, 0]}

def text_2_int(string)
  numberWords = string.gsub('-', ' ').split(/ /) - %w{and}
  current = result = 0
  numberWords.each do |word|
    scale, increment = WORDNUMS[word]
    current = current * scale + increment
    if scale > 100
      result += current
      current = 0
    end
  end
  return result + current
end

Я хотів обробляти рядки, як two thousand one hundred and forty-six

Question 16

Цей код працює для даних серії:

import pandas as pd
mylist = pd.Series(['one','two','three'])
mylist1 = []
for x in range(len(mylist)):
    mylist1.append(w2n.word_to_num(mylist[x]))
print(mylist1)

Question 17

This code works only for numbers below 99.
both word to Int and int to word.
(for rest need to implement 10-20 lines of code and simple logic. This is just simple code for beginners)


num=input("Enter the number you want to convert : ")
mydict={'1': 'One', '2': 'Two', '3': 'Three', '4': 'Four', '5': 'Five','6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine', '10': 'Ten','11': 'Eleven', '12': 'Twelve', '13': 'Thirteen', '14': 'Fourteen', '15': 'Fifteen', '16': 'Sixteen', '17': 'Seventeen', '18': 'Eighteen', '19': 'Nineteen'}
mydict2=['','','Twenty','Thirty','Fourty','fifty','sixty','Seventy','Eighty','Ninty']
if num.isdigit():
    if(int(num)<20):
        print(" :---> "+mydict[num])
    else:
            var1=int(num)%10
            var2=int(num)/10
            print(" :---> "+mydict2[int(var2)]+mydict[str(var1)])
else:
    num=num.lower();
    dict_w={'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':'17','eighteen':'18','nineteen':'19'}
    mydict2=['','','twenty','thirty','fourty','fifty','sixty','seventy','eighty','ninty']
    divide=num[num.find("ty")+2:]
    if num:
        if(num in dict_w.keys()):
            print(" :---> "+str(dict_w[num]))
        elif divide=='' :
                for i in range(0, len(mydict2)-1):
                   if mydict2[i] == num:
                      print(" :---> "+str(i*10))
        else :
            str3=0
            str1=num[num.find("ty")+2:]
            str2=num[:-len(str1)]
            for i in range(0, len(mydict2) ):
                if mydict2[i] == str2:
                    str3=i;
            if str2 not in mydict2:
                print("----->Invalid Input<-----")                
            else:
                try:
                    print(" :---> "+str((str3*10)+dict_w[str1]))
                except:
                    print("----->Invalid Input<-----")
    else:
            print("----->Please Enter Input<-----")