Commit
·
9dbb134
1
Parent(s):
1bc58c6
Update code
Browse files- routers/soundex.py +6 -2
- routers/spell.py +11 -3
- routers/tokenize.py +14 -4
- routers/util.py +16 -4
routers/soundex.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
|
|
|
|
| 3 |
from pythainlp.soundex import (
|
| 4 |
soundex as py_soundex
|
| 5 |
)
|
|
@@ -25,4 +26,7 @@ def soundex(word: str, engine: SoundexEngine = "udom83"):
|
|
| 25 |
- **word**: A word that want into phonetic code.
|
| 26 |
- **engine**: Soundex Engine (default is udom83)
|
| 27 |
"""
|
| 28 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, Response
|
| 4 |
from pythainlp.soundex import (
|
| 5 |
soundex as py_soundex
|
| 6 |
)
|
|
|
|
| 26 |
- **word**: A word that want into phonetic code.
|
| 27 |
- **engine**: Soundex Engine (default is udom83)
|
| 28 |
"""
|
| 29 |
+
return Response(
|
| 30 |
+
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
| 31 |
+
media_type="application/json",
|
| 32 |
+
)
|
routers/spell.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
|
|
|
|
| 3 |
from pythainlp.spell import (
|
| 4 |
correct as py_correct,
|
| 5 |
spell as py_spell
|
|
@@ -33,7 +34,11 @@ def correct(word: float, engine: CorrectEngine = "pn"):
|
|
| 33 |
- **word**: A word that want corrects the spelling of the given word.
|
| 34 |
- **engine**: Correct Engine (default is pn)
|
| 35 |
"""
|
| 36 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
@router.post('/spell', response_model=SpellResponse)
|
| 39 |
def spell(word: float, engine: SpellEngine = "pn"):
|
|
@@ -45,4 +50,7 @@ def spell(word: float, engine: SpellEngine = "pn"):
|
|
| 45 |
- **word**: A word that want to check spell.
|
| 46 |
- **engine**: Spell Engine (default is pn)
|
| 47 |
"""
|
| 48 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, Response
|
| 4 |
from pythainlp.spell import (
|
| 5 |
correct as py_correct,
|
| 6 |
spell as py_spell
|
|
|
|
| 34 |
- **word**: A word that want corrects the spelling of the given word.
|
| 35 |
- **engine**: Correct Engine (default is pn)
|
| 36 |
"""
|
| 37 |
+
return Response(
|
| 38 |
+
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
| 39 |
+
media_type="application/json",
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
|
| 43 |
@router.post('/spell', response_model=SpellResponse)
|
| 44 |
def spell(word: float, engine: SpellEngine = "pn"):
|
|
|
|
| 50 |
- **word**: A word that want to check spell.
|
| 51 |
- **engine**: Spell Engine (default is pn)
|
| 52 |
"""
|
| 53 |
+
return Response(
|
| 54 |
+
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
| 55 |
+
media_type="application/json",
|
| 56 |
+
)
|
routers/tokenize.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
|
|
|
|
| 3 |
from pythainlp.tokenize import (
|
| 4 |
word_tokenize as py_word_tokenize,
|
| 5 |
subword_tokenize as py_subword_tokenize,
|
|
@@ -49,7 +50,10 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
| 49 |
- **text**: Text that want to tokenize.
|
| 50 |
- **engine**: Word Tokenize Engine (default is newmm)
|
| 51 |
"""
|
| 52 |
-
return
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
|
@@ -62,7 +66,10 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
| 62 |
- **text**: Text that want to tokenize.
|
| 63 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
| 64 |
"""
|
| 65 |
-
return
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
|
| 68 |
@router.post('/sent_tokenize', response_model=SentTokenizeEngine)
|
|
@@ -75,4 +82,7 @@ def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
|
| 75 |
- **text**: Text that want to tokenize.
|
| 76 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
| 77 |
"""
|
| 78 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, Response
|
| 4 |
from pythainlp.tokenize import (
|
| 5 |
word_tokenize as py_word_tokenize,
|
| 6 |
subword_tokenize as py_subword_tokenize,
|
|
|
|
| 50 |
- **text**: Text that want to tokenize.
|
| 51 |
- **engine**: Word Tokenize Engine (default is newmm)
|
| 52 |
"""
|
| 53 |
+
return Response(
|
| 54 |
+
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 55 |
+
media_type="application/json",
|
| 56 |
+
)
|
| 57 |
|
| 58 |
|
| 59 |
@router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
|
|
|
|
| 66 |
- **text**: Text that want to tokenize.
|
| 67 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
| 68 |
"""
|
| 69 |
+
return Response(
|
| 70 |
+
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 71 |
+
media_type="application/json",
|
| 72 |
+
)
|
| 73 |
|
| 74 |
|
| 75 |
@router.post('/sent_tokenize', response_model=SentTokenizeEngine)
|
|
|
|
| 82 |
- **text**: Text that want to tokenize.
|
| 83 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
| 84 |
"""
|
| 85 |
+
return Response(
|
| 86 |
+
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
| 87 |
+
media_type="application/json",
|
| 88 |
+
)
|
routers/util.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
-
|
|
|
|
| 3 |
from pythainlp.util import (
|
| 4 |
bahttext as py_bahttext,
|
| 5 |
normalize as py_normalize,
|
|
@@ -12,18 +13,29 @@ def bahttext(number: float):
|
|
| 12 |
"""
|
| 13 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
| 14 |
"""
|
| 15 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
@router.post('/normalize')
|
| 18 |
def normalize(text: str):
|
| 19 |
"""
|
| 20 |
Normalize and clean Thai text
|
| 21 |
"""
|
| 22 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
@router.post('/tone_detector')
|
| 25 |
def tone_detector(syllable: str):
|
| 26 |
"""
|
| 27 |
Thai tone detector for word.
|
| 28 |
"""
|
| 29 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, Response
|
| 4 |
from pythainlp.util import (
|
| 5 |
bahttext as py_bahttext,
|
| 6 |
normalize as py_normalize,
|
|
|
|
| 13 |
"""
|
| 14 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
| 15 |
"""
|
| 16 |
+
return Response(
|
| 17 |
+
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
| 18 |
+
media_type="application/json",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
|
| 22 |
@router.post('/normalize')
|
| 23 |
def normalize(text: str):
|
| 24 |
"""
|
| 25 |
Normalize and clean Thai text
|
| 26 |
"""
|
| 27 |
+
return Response(
|
| 28 |
+
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
| 29 |
+
media_type="application/json",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
|
| 33 |
@router.post('/tone_detector')
|
| 34 |
def tone_detector(syllable: str):
|
| 35 |
"""
|
| 36 |
Thai tone detector for word.
|
| 37 |
"""
|
| 38 |
+
return Response(
|
| 39 |
+
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
| 40 |
+
media_type="application/json",
|
| 41 |
+
)
|