ความเปลี่ยนแปลง
On 5 กุมภาพันธ์ ค.ศ. 2026 22 นาฬิกา 33 นาที 15 วินาที +0700,
-
Changed value of field
allow_harvest
toFalse
in LST20 Corpus -
Changed value of field
harvest_object_id
to9e8f256c-3d90-43d3-b487-e4e737be483b
(previouslyf76d583f-bb12-424c-bc28-1850ced62e98
) in LST20 Corpus -
Changed value of field
harvest_job_id
toacbfb3fd-9ebf-4aac-be4a-95e4a3998029
(previouslyaa4860df-e4e6-4b75-a533-5ae4b65d858b
) in LST20 Corpus
| f | 1 | { | f | 1 | { |
| 2 | "accessible_condition": | 2 | "accessible_condition": | ||
| 3 | \u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n1. | 3 | \u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n1. | ||
| 4 | 0e48\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 4 | 0e48\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 5 | 0e0a\u0e49\u0e43\u0e19\u0e07\u0e32\u0e19\u0e27\u0e34\u0e08\u0e31\u0e22 | 5 | 0e0a\u0e49\u0e43\u0e19\u0e07\u0e32\u0e19\u0e27\u0e34\u0e08\u0e31\u0e22 | ||
| 6 | 0e21\u0e48\u0e43\u0e0a\u0e48\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 6 | 0e21\u0e48\u0e43\u0e0a\u0e48\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 7 | \u0e01\u0e32\u0e23\u0e27\u0e34\u0e08\u0e31\u0e22 | 7 | \u0e01\u0e32\u0e23\u0e27\u0e34\u0e08\u0e31\u0e22 | ||
| 8 | 0e1b\u0e34\u0e14\u0e0b\u0e2d\u0e23\u0e4c\u0e2a\u0e42\u0e04\u0e49\u0e14 | 8 | 0e1b\u0e34\u0e14\u0e0b\u0e2d\u0e23\u0e4c\u0e2a\u0e42\u0e04\u0e49\u0e14 | ||
| 9 | 0e44\u0e21\u0e48\u0e04\u0e34\u0e14\u0e21\u0e39\u0e25\u0e04\u0e48\u0e32 | 9 | 0e44\u0e21\u0e48\u0e04\u0e34\u0e14\u0e21\u0e39\u0e25\u0e04\u0e48\u0e32 | ||
| 10 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | 10 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | ||
| 11 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | 11 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | ||
| 12 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | 12 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | ||
| 13 | 0e42\u0e1b\u0e23\u0e14\u0e2a\u0e48\u0e07\u0e42\u0e21\u0e40\u0e14\u0e25 | 13 | 0e42\u0e1b\u0e23\u0e14\u0e2a\u0e48\u0e07\u0e42\u0e21\u0e40\u0e14\u0e25 | ||
| 14 | \u0e42\u0e04\u0e49\u0e14 \u0e41\u0e25\u0e30 APIs | 14 | \u0e42\u0e04\u0e49\u0e14 \u0e41\u0e25\u0e30 APIs | ||
| 15 | 0e21\u0e32\u0e22\u0e31\u0e07\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23 | 15 | 0e21\u0e32\u0e22\u0e31\u0e07\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23 | ||
| 16 | AI for Thai \u0e44\u0e14\u0e49 | 16 | AI for Thai \u0e44\u0e14\u0e49 | ||
| 17 | \u0e42\u0e14\u0e22\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d \u0e14\u0e23. | 17 | \u0e42\u0e14\u0e22\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d \u0e14\u0e23. | ||
| 18 | \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | 18 | \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | ||
| 19 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | 19 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | ||
| 20 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | 20 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | ||
| 21 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | 21 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | ||
| 22 | \u0e32\u0e02\u0e2d\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n2. | 22 | \u0e32\u0e02\u0e2d\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n2. | ||
| 23 | 0e19\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 23 | 0e19\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 24 | 2\u0e49\u0e2d\u0e15\u0e48\u0e2d\u0e44\u0e1b\u0e19\u0e35\u0e49\r\n\r\n- | 24 | 2\u0e49\u0e2d\u0e15\u0e48\u0e2d\u0e44\u0e1b\u0e19\u0e35\u0e49\r\n\r\n- | ||
| 25 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 1: | 25 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 1: | ||
| 26 | 0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e02\u0e19\u0e32\u0e14 | 26 | 0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e02\u0e19\u0e32\u0e14 | ||
| 27 | 50,000 \u0e04\u0e33 | 27 | 50,000 \u0e04\u0e33 | ||
| 28 | 0e2d\u0e21\u0e39\u0e25\u0e17\u0e35\u0e48\u0e41\u0e19\u0e1a\u0e21\u0e32 | 28 | 0e2d\u0e21\u0e39\u0e25\u0e17\u0e35\u0e48\u0e41\u0e19\u0e1a\u0e21\u0e32 | ||
| 29 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | 29 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | ||
| 30 | 7\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e14\u0e49\u0e27\u0e22\r\n\r\n- | 30 | 7\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e14\u0e49\u0e27\u0e22\r\n\r\n- | ||
| 31 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 2: | 31 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 2: | ||
| 32 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | 32 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | ||
| 33 | 0e07\u0e32\u0e19\u0e15\u0e25\u0e2d\u0e14\u0e0a\u0e35\u0e27\u0e34\u0e15 | 33 | 0e07\u0e32\u0e19\u0e15\u0e25\u0e2d\u0e14\u0e0a\u0e35\u0e27\u0e34\u0e15 | ||
| 34 | 0e35\u0e49\u0e42\u0e1b\u0e23\u0e14\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d | 34 | 0e35\u0e49\u0e42\u0e1b\u0e23\u0e14\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d | ||
| 35 | \u0e14\u0e23. \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | 35 | \u0e14\u0e23. \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | ||
| 36 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | 36 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | ||
| 37 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | 37 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | ||
| 38 | thepchai@nectec.or.th | 38 | thepchai@nectec.or.th | ||
| 39 | u0e40\u0e1e\u0e34\u0e48\u0e21\u0e40\u0e15\u0e34\u0e21\r\n\r\nAGREEMENT | 39 | u0e40\u0e1e\u0e34\u0e48\u0e21\u0e40\u0e15\u0e34\u0e21\r\n\r\nAGREEMENT | ||
| 40 | OF USAGE\r\n\r\n1. Non-commercial use, research, and open | 40 | OF USAGE\r\n\r\n1. Non-commercial use, research, and open | ||
| 41 | source\r\n\r\nAny non-commercial use of the dataset for research and | 41 | source\r\n\r\nAny non-commercial use of the dataset for research and | ||
| 42 | open-sourced projects is encouraged and free of charge. Please cite | 42 | open-sourced projects is encouraged and free of charge. Please cite | ||
| 43 | our technical report for reference.\r\n\r\nIf you want to perpetuate | 43 | our technical report for reference.\r\n\r\nIf you want to perpetuate | ||
| 44 | your models trained on our dataset and share them to the research | 44 | your models trained on our dataset and share them to the research | ||
| 45 | community in Thailand, please send your models, code, and APIs to the | 45 | community in Thailand, please send your models, code, and APIs to the | ||
| 46 | AI for Thai Project. Please contact Dr. Thepchai Supnithi via | 46 | AI for Thai Project. Please contact Dr. Thepchai Supnithi via | ||
| 47 | thepchai@nectec.or.th for more information.\r\n\r\nNote that | 47 | thepchai@nectec.or.th for more information.\r\n\r\nNote that | ||
| 48 | modification and redistribution of the dataset by any means are | 48 | modification and redistribution of the dataset by any means are | ||
| 49 | strictly prohibited unless authorized by the corpus authors.\r\n\r\n2. | 49 | strictly prohibited unless authorized by the corpus authors.\r\n\r\n2. | ||
| 50 | Commercial use\r\n\r\nIn any commercial use of the dataset, there are | 50 | Commercial use\r\n\r\nIn any commercial use of the dataset, there are | ||
| 51 | two options.\r\n\r\n- Option 1 (in kind): Contributing a dataset of | 51 | two options.\r\n\r\n- Option 1 (in kind): Contributing a dataset of | ||
| 52 | 50,000 words completely annotated with our annotation scheme within 1 | 52 | 50,000 words completely annotated with our annotation scheme within 1 | ||
| 53 | year. Your data will also be shared and recognized as a dataset | 53 | year. Your data will also be shared and recognized as a dataset | ||
| 54 | co-creator in the research community in Thailand.\r\n\r\n- Option 2 | 54 | co-creator in the research community in Thailand.\r\n\r\n- Option 2 | ||
| 55 | (in cash): Purchasing a lifetime license for the entire dataset is | 55 | (in cash): Purchasing a lifetime license for the entire dataset is | ||
| 56 | required. The purchased rights of use cover only this | 56 | required. The purchased rights of use cover only this | ||
| 57 | dataset.\r\n\r\nIn both options, please contact Dr. Thepchai Supnithi | 57 | dataset.\r\n\r\nIn both options, please contact Dr. Thepchai Supnithi | ||
| 58 | via thepchai@nectec.or.th for more information.\r\n\r\n", | 58 | via thepchai@nectec.or.th for more information.\r\n\r\n", | ||
| n | 59 | "allow_harvest": "True", | n | 59 | "allow_harvest": "False", |
| 60 | "author": null, | 60 | "author": null, | ||
| 61 | "author_email": null, | 61 | "author_email": null, | ||
| 62 | "creator_user_id": "3cc37be5-a41a-42e6-ae89-c13c06e2b801", | 62 | "creator_user_id": "3cc37be5-a41a-42e6-ae89-c13c06e2b801", | ||
| 63 | "data_category": | 63 | "data_category": | ||
| 64 | 49\u0e2d\u0e21\u0e39\u0e25\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e30", | 64 | 49\u0e2d\u0e21\u0e39\u0e25\u0e2a\u0e32\u0e18\u0e32\u0e23\u0e13\u0e30", | ||
| 65 | "data_collect": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | 65 | "data_collect": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | ||
| 66 | "data_collect_other": | 66 | "data_collect_other": | ||
| 67 | 2d\u0e1a\u0e40\u0e02\u0e15\u0e23\u0e30\u0e14\u0e31\u0e1a\u0e04\u0e33", | 67 | 2d\u0e1a\u0e40\u0e02\u0e15\u0e23\u0e30\u0e14\u0e31\u0e1a\u0e04\u0e33", | ||
| 68 | "data_format": [ | 68 | "data_format": [ | ||
| 69 | "\u0e2d\u0e37\u0e48\u0e19\u0e46" | 69 | "\u0e2d\u0e37\u0e48\u0e19\u0e46" | ||
| 70 | ], | 70 | ], | ||
| 71 | "data_format_other": "the CoNLL-2003-style format", | 71 | "data_format_other": "the CoNLL-2003-style format", | ||
| 72 | "data_language": [ | 72 | "data_language": [ | ||
| 73 | "\u0e44\u0e17\u0e22" | 73 | "\u0e44\u0e17\u0e22" | ||
| 74 | ], | 74 | ], | ||
| 75 | "data_source": "\u0e02\u0e48\u0e32\u0e27", | 75 | "data_source": "\u0e02\u0e48\u0e32\u0e27", | ||
| 76 | "data_support": | 76 | "data_support": | ||
| 77 | 48\u0e27\u0e22\u0e07\u0e32\u0e19\u0e02\u0e2d\u0e07\u0e23\u0e31\u0e10", | 77 | 48\u0e27\u0e22\u0e07\u0e32\u0e19\u0e02\u0e2d\u0e07\u0e23\u0e31\u0e10", | ||
| 78 | "data_support_other": "", | 78 | "data_support_other": "", | ||
| 79 | "data_type": | 79 | "data_type": | ||
| 80 | 49\u0e2d\u0e21\u0e39\u0e25\u0e23\u0e30\u0e40\u0e1a\u0e35\u0e22\u0e19", | 80 | 49\u0e2d\u0e21\u0e39\u0e25\u0e23\u0e30\u0e40\u0e1a\u0e35\u0e22\u0e19", | ||
| 81 | "disaggregate": [], | 81 | "disaggregate": [], | ||
| 82 | "disclosure": "public", | 82 | "disclosure": "public", | ||
| 83 | "extras": [ | 83 | "extras": [ | ||
| 84 | { | 84 | { | ||
| 85 | "key": "harvest_job_id", | 85 | "key": "harvest_job_id", | ||
| n | 86 | "value": "aa4860df-e4e6-4b75-a533-5ae4b65d858b" | n | 86 | "value": "acbfb3fd-9ebf-4aac-be4a-95e4a3998029" |
| 87 | }, | 87 | }, | ||
| 88 | { | 88 | { | ||
| 89 | "key": "harvest_object_id", | 89 | "key": "harvest_object_id", | ||
| n | 90 | "value": "f76d583f-bb12-424c-bc28-1850ced62e98" | n | 90 | "value": "9e8f256c-3d90-43d3-b487-e4e737be483b" |
| 91 | }, | 91 | }, | ||
| 92 | { | 92 | { | ||
| 93 | "key": "harvest_source_id", | 93 | "key": "harvest_source_id", | ||
| 94 | "value": "4332bc23-5204-4a78-ad02-eadde32a56e7" | 94 | "value": "4332bc23-5204-4a78-ad02-eadde32a56e7" | ||
| 95 | }, | 95 | }, | ||
| 96 | { | 96 | { | ||
| 97 | "key": "harvest_url", | 97 | "key": "harvest_url", | ||
| 98 | "value": | 98 | "value": | ||
| 99 | pend-portal.nectec.or.th/dataset/d1364791-84bc-4b65-9904-79aa0aa2c5a6" | 99 | pend-portal.nectec.or.th/dataset/d1364791-84bc-4b65-9904-79aa0aa2c5a6" | ||
| 100 | }, | 100 | }, | ||
| 101 | { | 101 | { | ||
| 102 | "key": "source_dataset_id", | 102 | "key": "source_dataset_id", | ||
| 103 | "value": "d1364791-84bc-4b65-9904-79aa0aa2c5a6" | 103 | "value": "d1364791-84bc-4b65-9904-79aa0aa2c5a6" | ||
| 104 | } | 104 | } | ||
| 105 | ], | 105 | ], | ||
| 106 | "geo_coverage": "\u0e44\u0e21\u0e48\u0e21\u0e35", | 106 | "geo_coverage": "\u0e44\u0e21\u0e48\u0e21\u0e35", | ||
| 107 | "geo_coverage_other": "", | 107 | "geo_coverage_other": "", | ||
| 108 | "groups": [], | 108 | "groups": [], | ||
| 109 | "id": "d1364791-84bc-4b65-9904-79aa0aa2c5a6", | 109 | "id": "d1364791-84bc-4b65-9904-79aa0aa2c5a6", | ||
| 110 | "isopen": false, | 110 | "isopen": false, | ||
| 111 | "license_id": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | 111 | "license_id": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | ||
| 112 | "license_id_other": "", | 112 | "license_id_other": "", | ||
| 113 | "license_title": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | 113 | "license_title": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | ||
| 114 | "maintainer": "\u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | 114 | "maintainer": "\u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | ||
| 115 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34", | 115 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34", | ||
| 116 | "maintainer_email": "thepchai.supnithi@nectec.or.th", | 116 | "maintainer_email": "thepchai.supnithi@nectec.or.th", | ||
| 117 | "metadata_created": "2024-05-06T15:17:20.957800", | 117 | "metadata_created": "2024-05-06T15:17:20.957800", | ||
| n | 118 | "metadata_modified": "2024-05-06T15:17:20.957807", | n | 118 | "metadata_modified": "2026-02-05T15:33:15.151425", |
| 119 | "name": "lst20-corpus", | 119 | "name": "lst20-corpus", | ||
| 120 | "notes": "LST20 Corpus | 120 | "notes": "LST20 Corpus | ||
| 121 | 0e21\u0e27\u0e25\u0e1c\u0e25\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22 | 121 | 0e21\u0e27\u0e25\u0e1c\u0e25\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22 | ||
| 122 | 0e15\u0e2d\u0e23\u0e4c\u0e41\u0e2b\u0e48\u0e07\u0e0a\u0e32\u0e15\u0e34 | 122 | 0e15\u0e2d\u0e23\u0e4c\u0e41\u0e2b\u0e48\u0e07\u0e0a\u0e32\u0e15\u0e34 | ||
| 123 | 0e49\u0e2d\u0e21\u0e39\u0e25\u0e17\u0e32\u0e07\u0e20\u0e32\u0e29\u0e32 | 123 | 0e49\u0e2d\u0e21\u0e39\u0e25\u0e17\u0e32\u0e07\u0e20\u0e32\u0e29\u0e32 | ||
| 124 | 5 \u0e23\u0e30\u0e14\u0e31\u0e1a \u0e44\u0e14\u0e49\u0e41\u0e01\u0e48 | 124 | 5 \u0e23\u0e30\u0e14\u0e31\u0e1a \u0e44\u0e14\u0e49\u0e41\u0e01\u0e48 | ||
| 125 | \u0e02\u0e2d\u0e1a\u0e40\u0e02\u0e15\u0e02\u0e2d\u0e07\u0e04\u0e33 | 125 | \u0e02\u0e2d\u0e1a\u0e40\u0e02\u0e15\u0e02\u0e2d\u0e07\u0e04\u0e33 | ||
| 126 | (word boundaries), | 126 | (word boundaries), | ||
| 127 | \u0e0a\u0e19\u0e34\u0e14\u0e02\u0e2d\u0e07\u0e04\u0e33 (part of | 127 | \u0e0a\u0e19\u0e34\u0e14\u0e02\u0e2d\u0e07\u0e04\u0e33 (part of | ||
| 128 | speech), | 128 | speech), | ||
| 129 | 0e02\u0e2d\u0e07\u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30 | 129 | 0e02\u0e2d\u0e07\u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30 | ||
| 130 | (named entities), | 130 | (named entities), | ||
| 131 | 0e2d\u0e07\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22 | 131 | 0e2d\u0e07\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22 | ||
| 132 | (clause boundaries), | 132 | (clause boundaries), | ||
| 133 | 0e02\u0e15\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | 133 | 0e02\u0e15\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | ||
| 134 | (sentence boundaries) | 134 | (sentence boundaries) | ||
| 135 | e35\u0e49\u0e1b\u0e23\u0e30\u0e01\u0e2d\u0e1a\u0e14\u0e49\u0e27\u0e22: | 135 | e35\u0e49\u0e1b\u0e23\u0e30\u0e01\u0e2d\u0e1a\u0e14\u0e49\u0e27\u0e22: | ||
| 136 | 3,164,002 \u0e04\u0e33, 288,020 | 136 | 3,164,002 \u0e04\u0e33, 288,020 | ||
| 137 | \u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30, 248,181 | 137 | \u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30, 248,181 | ||
| 138 | \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22, and | 138 | \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22, and | ||
| 139 | 74,180 \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | 139 | 74,180 \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | ||
| 140 | 0e31\u0e49\u0e07\u0e2a\u0e34\u0e49\u0e19\u0e40\u0e1e\u0e35\u0e22\u0e07 | 140 | 0e31\u0e49\u0e07\u0e2a\u0e34\u0e49\u0e19\u0e40\u0e1e\u0e35\u0e22\u0e07 | ||
| 141 | 16 | 141 | 16 | ||
| 142 | 0e41\u0e17\u0e47\u0e01\u0e40\u0e17\u0e48\u0e32\u0e19\u0e31\u0e49\u0e19 | 142 | 0e41\u0e17\u0e47\u0e01\u0e40\u0e17\u0e48\u0e32\u0e19\u0e31\u0e49\u0e19 | ||
| 143 | 0e32\u0e23\u0e23\u0e27\u0e1a\u0e23\u0e27\u0e21\u0e02\u0e48\u0e32\u0e27 | 143 | 0e32\u0e23\u0e23\u0e27\u0e1a\u0e23\u0e27\u0e21\u0e02\u0e48\u0e32\u0e27 | ||
| 144 | 3,745 \u0e0a\u0e34\u0e49\u0e19 | 144 | 3,745 \u0e0a\u0e34\u0e49\u0e19 | ||
| 145 | 0e2d\u0e07\u0e02\u0e48\u0e32\u0e27\u0e40\u0e2d\u0e32\u0e44\u0e27\u0e49 | 145 | 0e2d\u0e07\u0e02\u0e48\u0e32\u0e27\u0e40\u0e2d\u0e32\u0e44\u0e27\u0e49 | ||
| 146 | 15 \u0e0a\u0e19\u0e34\u0e14\u0e2d\u0e35\u0e01\u0e14\u0e49\u0e27\u0e22 | 146 | 15 \u0e0a\u0e19\u0e34\u0e14\u0e2d\u0e35\u0e01\u0e14\u0e49\u0e27\u0e22 | ||
| 147 | e0b\u0e31\u0e1a\u0e0b\u0e49\u0e2d\u0e19\u0e44\u0e14\u0e49\r\n\r\nLST20 | 147 | e0b\u0e31\u0e1a\u0e0b\u0e49\u0e2d\u0e19\u0e44\u0e14\u0e49\r\n\r\nLST20 | ||
| 148 | Corpus is a dataset for Thai language processing developed by National | 148 | Corpus is a dataset for Thai language processing developed by National | ||
| 149 | Electronics and Computer Technology Center (NECTEC), Thailand. It | 149 | Electronics and Computer Technology Center (NECTEC), Thailand. It | ||
| 150 | offers five layers of linguistic annotation: word boundaries, POS | 150 | offers five layers of linguistic annotation: word boundaries, POS | ||
| 151 | tagging, named entities, clause boundaries, and sentence boundaries. | 151 | tagging, named entities, clause boundaries, and sentence boundaries. | ||
| 152 | At a large scale, it consists of 3,164,002 words, 288,020 named | 152 | At a large scale, it consists of 3,164,002 words, 288,020 named | ||
| 153 | entities, 248,181 clauses, and 74,180 sentences, while it is annotated | 153 | entities, 248,181 clauses, and 74,180 sentences, while it is annotated | ||
| 154 | with 16 distinct POS tags. All 3,745 documents are also annotated with | 154 | with 16 distinct POS tags. All 3,745 documents are also annotated with | ||
| 155 | one of 15 news genres. Regarding its sheer size, this dataset is | 155 | one of 15 news genres. Regarding its sheer size, this dataset is | ||
| 156 | considered large enough for developing joint neural models for | 156 | considered large enough for developing joint neural models for | ||
| 157 | NLP.\r\n", | 157 | NLP.\r\n", | ||
| 158 | "num_resources": 1, | 158 | "num_resources": 1, | ||
| 159 | "num_tags": 1, | 159 | "num_tags": 1, | ||
| 160 | "objective": [ | 160 | "objective": [ | ||
| 161 | 161 | ||||
| 162 | 17\u0e18\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c\u0e0a\u0e32\u0e15\u0e34", | 162 | 17\u0e18\u0e28\u0e32\u0e2a\u0e15\u0e23\u0e4c\u0e0a\u0e32\u0e15\u0e34", | ||
| 163 | 163 | ||||
| 164 | e23\u0e34\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19" | 164 | e23\u0e34\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19" | ||
| 165 | ], | 165 | ], | ||
| 166 | "organization": { | 166 | "organization": { | ||
| 167 | "approval_status": "approved", | 167 | "approval_status": "approved", | ||
| 168 | "created": "2022-05-08T01:41:40.880813", | 168 | "created": "2022-05-08T01:41:40.880813", | ||
| 169 | "description": "", | 169 | "description": "", | ||
| 170 | "id": "16c79885-d87e-42e6-96ac-c694e0e2c5e0", | 170 | "id": "16c79885-d87e-42e6-96ac-c694e0e2c5e0", | ||
| 171 | "image_url": "", | 171 | "image_url": "", | ||
| 172 | "is_organization": true, | 172 | "is_organization": true, | ||
| 173 | "name": "nectec-ainrg", | 173 | "name": "nectec-ainrg", | ||
| 174 | "state": "active", | 174 | "state": "active", | ||
| 175 | "title": | 175 | "title": | ||
| 176 | d\u0e0d\u0e32\u0e1b\u0e23\u0e30\u0e14\u0e34\u0e29\u0e10\u0e4c(AINRG)", | 176 | d\u0e0d\u0e32\u0e1b\u0e23\u0e30\u0e14\u0e34\u0e29\u0e10\u0e4c(AINRG)", | ||
| 177 | "type": "organization" | 177 | "type": "organization" | ||
| 178 | }, | 178 | }, | ||
| 179 | "owner_org": "16c79885-d87e-42e6-96ac-c694e0e2c5e0", | 179 | "owner_org": "16c79885-d87e-42e6-96ac-c694e0e2c5e0", | ||
| 180 | "private": false, | 180 | "private": false, | ||
| 181 | "relationships_as_object": [], | 181 | "relationships_as_object": [], | ||
| 182 | "relationships_as_subject": [], | 182 | "relationships_as_subject": [], | ||
| 183 | "resources": [ | 183 | "resources": [ | ||
| 184 | { | 184 | { | ||
| 185 | "cache_last_updated": null, | 185 | "cache_last_updated": null, | ||
| 186 | "cache_url": null, | 186 | "cache_url": null, | ||
| 187 | "created": "2022-09-23T09:16:19.451791", | 187 | "created": "2022-09-23T09:16:19.451791", | ||
| 188 | "created_at": "2022-09-23T09:16:19.451791", | 188 | "created_at": "2022-09-23T09:16:19.451791", | ||
| 189 | "datastore_active": false, | 189 | "datastore_active": false, | ||
| 190 | "datastore_contains_all_records_of_source_file": false, | 190 | "datastore_contains_all_records_of_source_file": false, | ||
| 191 | "description": "LST20 Corpus | 191 | "description": "LST20 Corpus | ||
| 192 | 0e21\u0e27\u0e25\u0e1c\u0e25\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22 | 192 | 0e21\u0e27\u0e25\u0e1c\u0e25\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22 | ||
| 193 | 0e15\u0e2d\u0e23\u0e4c\u0e41\u0e2b\u0e48\u0e07\u0e0a\u0e32\u0e15\u0e34 | 193 | 0e15\u0e2d\u0e23\u0e4c\u0e41\u0e2b\u0e48\u0e07\u0e0a\u0e32\u0e15\u0e34 | ||
| 194 | 0e49\u0e2d\u0e21\u0e39\u0e25\u0e17\u0e32\u0e07\u0e20\u0e32\u0e29\u0e32 | 194 | 0e49\u0e2d\u0e21\u0e39\u0e25\u0e17\u0e32\u0e07\u0e20\u0e32\u0e29\u0e32 | ||
| 195 | 5 \u0e23\u0e30\u0e14\u0e31\u0e1a \u0e44\u0e14\u0e49\u0e41\u0e01\u0e48 | 195 | 5 \u0e23\u0e30\u0e14\u0e31\u0e1a \u0e44\u0e14\u0e49\u0e41\u0e01\u0e48 | ||
| 196 | \u0e02\u0e2d\u0e1a\u0e40\u0e02\u0e15\u0e02\u0e2d\u0e07\u0e04\u0e33 | 196 | \u0e02\u0e2d\u0e1a\u0e40\u0e02\u0e15\u0e02\u0e2d\u0e07\u0e04\u0e33 | ||
| 197 | (word boundaries), | 197 | (word boundaries), | ||
| 198 | \u0e0a\u0e19\u0e34\u0e14\u0e02\u0e2d\u0e07\u0e04\u0e33 (part of | 198 | \u0e0a\u0e19\u0e34\u0e14\u0e02\u0e2d\u0e07\u0e04\u0e33 (part of | ||
| 199 | speech), | 199 | speech), | ||
| 200 | 0e02\u0e2d\u0e07\u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30 | 200 | 0e02\u0e2d\u0e07\u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30 | ||
| 201 | (named entities), | 201 | (named entities), | ||
| 202 | 0e2d\u0e07\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22 | 202 | 0e2d\u0e07\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22 | ||
| 203 | (clause boundaries), | 203 | (clause boundaries), | ||
| 204 | 0e02\u0e15\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | 204 | 0e02\u0e15\u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | ||
| 205 | (sentence boundaries) | 205 | (sentence boundaries) | ||
| 206 | e35\u0e49\u0e1b\u0e23\u0e30\u0e01\u0e2d\u0e1a\u0e14\u0e49\u0e27\u0e22: | 206 | e35\u0e49\u0e1b\u0e23\u0e30\u0e01\u0e2d\u0e1a\u0e14\u0e49\u0e27\u0e22: | ||
| 207 | 3,164,002 \u0e04\u0e33, 288,020 | 207 | 3,164,002 \u0e04\u0e33, 288,020 | ||
| 208 | \u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30, 248,181 | 208 | \u0e0a\u0e37\u0e48\u0e2d\u0e40\u0e09\u0e1e\u0e32\u0e30, 248,181 | ||
| 209 | \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22, and | 209 | \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e22\u0e48\u0e2d\u0e22, and | ||
| 210 | 74,180 \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | 210 | 74,180 \u0e1b\u0e23\u0e30\u0e42\u0e22\u0e04\u0e43\u0e2b\u0e0d\u0e48 | ||
| 211 | 0e31\u0e49\u0e07\u0e2a\u0e34\u0e49\u0e19\u0e40\u0e1e\u0e35\u0e22\u0e07 | 211 | 0e31\u0e49\u0e07\u0e2a\u0e34\u0e49\u0e19\u0e40\u0e1e\u0e35\u0e22\u0e07 | ||
| 212 | 16 | 212 | 16 | ||
| 213 | 0e41\u0e17\u0e47\u0e01\u0e40\u0e17\u0e48\u0e32\u0e19\u0e31\u0e49\u0e19 | 213 | 0e41\u0e17\u0e47\u0e01\u0e40\u0e17\u0e48\u0e32\u0e19\u0e31\u0e49\u0e19 | ||
| 214 | 0e32\u0e23\u0e23\u0e27\u0e1a\u0e23\u0e27\u0e21\u0e02\u0e48\u0e32\u0e27 | 214 | 0e32\u0e23\u0e23\u0e27\u0e1a\u0e23\u0e27\u0e21\u0e02\u0e48\u0e32\u0e27 | ||
| 215 | 3,745 \u0e0a\u0e34\u0e49\u0e19 | 215 | 3,745 \u0e0a\u0e34\u0e49\u0e19 | ||
| 216 | 0e2d\u0e07\u0e02\u0e48\u0e32\u0e27\u0e40\u0e2d\u0e32\u0e44\u0e27\u0e49 | 216 | 0e2d\u0e07\u0e02\u0e48\u0e32\u0e27\u0e40\u0e2d\u0e32\u0e44\u0e27\u0e49 | ||
| 217 | 15 \u0e0a\u0e19\u0e34\u0e14\u0e2d\u0e35\u0e01\u0e14\u0e49\u0e27\u0e22 | 217 | 15 \u0e0a\u0e19\u0e34\u0e14\u0e2d\u0e35\u0e01\u0e14\u0e49\u0e27\u0e22 | ||
| 218 | e0b\u0e31\u0e1a\u0e0b\u0e49\u0e2d\u0e19\u0e44\u0e14\u0e49\r\n\r\nLST20 | 218 | e0b\u0e31\u0e1a\u0e0b\u0e49\u0e2d\u0e19\u0e44\u0e14\u0e49\r\n\r\nLST20 | ||
| 219 | Corpus is a dataset for Thai language processing developed by National | 219 | Corpus is a dataset for Thai language processing developed by National | ||
| 220 | Electronics and Computer Technology Center (NECTEC), Thailand. It | 220 | Electronics and Computer Technology Center (NECTEC), Thailand. It | ||
| 221 | offers five layers of linguistic annotation: word boundaries, POS | 221 | offers five layers of linguistic annotation: word boundaries, POS | ||
| 222 | tagging, named entities, clause boundaries, and sentence boundaries. | 222 | tagging, named entities, clause boundaries, and sentence boundaries. | ||
| 223 | At a large scale, it consists of 3,164,002 words, 288,020 named | 223 | At a large scale, it consists of 3,164,002 words, 288,020 named | ||
| 224 | entities, 248,181 clauses, and 74,180 sentences, while it is annotated | 224 | entities, 248,181 clauses, and 74,180 sentences, while it is annotated | ||
| 225 | with 16 distinct POS tags. All 3,745 documents are also annotated with | 225 | with 16 distinct POS tags. All 3,745 documents are also annotated with | ||
| 226 | one of 15 news genres. Regarding its sheer size, this dataset is | 226 | one of 15 news genres. Regarding its sheer size, this dataset is | ||
| 227 | considered large enough for developing joint neural models for NLP.", | 227 | considered large enough for developing joint neural models for NLP.", | ||
| 228 | "format": "the CoNLL-2003-style format", | 228 | "format": "the CoNLL-2003-style format", | ||
| 229 | "hash": "", | 229 | "hash": "", | ||
| 230 | "id": "063e2392-1eba-4099-a732-fbaf1ba9a293", | 230 | "id": "063e2392-1eba-4099-a732-fbaf1ba9a293", | ||
| 231 | "last_modified": "2022-09-23T09:32:31.098167", | 231 | "last_modified": "2022-09-23T09:32:31.098167", | ||
| t | 232 | "metadata_modified": "2024-05-06T15:17:20.953831", | t | 232 | "metadata_modified": "2026-02-05T15:33:15.160604", |
| 233 | "mimetype": "application/zip", | 233 | "mimetype": "application/zip", | ||
| 234 | "mimetype_inner": null, | 234 | "mimetype_inner": null, | ||
| 235 | "name": "LST20 Corpus", | 235 | "name": "LST20 Corpus", | ||
| 236 | "package_id": "d1364791-84bc-4b65-9904-79aa0aa2c5a6", | 236 | "package_id": "d1364791-84bc-4b65-9904-79aa0aa2c5a6", | ||
| 237 | "position": 0, | 237 | "position": 0, | ||
| 238 | "resource_accessible_condition": | 238 | "resource_accessible_condition": | ||
| 239 | \u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n1. | 239 | \u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n1. | ||
| 240 | 0e48\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 240 | 0e48\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 241 | 0e0a\u0e49\u0e43\u0e19\u0e07\u0e32\u0e19\u0e27\u0e34\u0e08\u0e31\u0e22 | 241 | 0e0a\u0e49\u0e43\u0e19\u0e07\u0e32\u0e19\u0e27\u0e34\u0e08\u0e31\u0e22 | ||
| 242 | 0e21\u0e48\u0e43\u0e0a\u0e48\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 242 | 0e21\u0e48\u0e43\u0e0a\u0e48\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 243 | \u0e01\u0e32\u0e23\u0e27\u0e34\u0e08\u0e31\u0e22 | 243 | \u0e01\u0e32\u0e23\u0e27\u0e34\u0e08\u0e31\u0e22 | ||
| 244 | 0e1b\u0e34\u0e14\u0e0b\u0e2d\u0e23\u0e4c\u0e2a\u0e42\u0e04\u0e49\u0e14 | 244 | 0e1b\u0e34\u0e14\u0e0b\u0e2d\u0e23\u0e4c\u0e2a\u0e42\u0e04\u0e49\u0e14 | ||
| 245 | 0e44\u0e21\u0e48\u0e04\u0e34\u0e14\u0e21\u0e39\u0e25\u0e04\u0e48\u0e32 | 245 | 0e44\u0e21\u0e48\u0e04\u0e34\u0e14\u0e21\u0e39\u0e25\u0e04\u0e48\u0e32 | ||
| 246 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | 246 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | ||
| 247 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | 247 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | ||
| 248 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | 248 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | ||
| 249 | 0e42\u0e1b\u0e23\u0e14\u0e2a\u0e48\u0e07\u0e42\u0e21\u0e40\u0e14\u0e25 | 249 | 0e42\u0e1b\u0e23\u0e14\u0e2a\u0e48\u0e07\u0e42\u0e21\u0e40\u0e14\u0e25 | ||
| 250 | \u0e42\u0e04\u0e49\u0e14 \u0e41\u0e25\u0e30 APIs | 250 | \u0e42\u0e04\u0e49\u0e14 \u0e41\u0e25\u0e30 APIs | ||
| 251 | 0e21\u0e32\u0e22\u0e31\u0e07\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23 | 251 | 0e21\u0e32\u0e22\u0e31\u0e07\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23 | ||
| 252 | AI for Thai \u0e44\u0e14\u0e49 | 252 | AI for Thai \u0e44\u0e14\u0e49 | ||
| 253 | \u0e42\u0e14\u0e22\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d \u0e14\u0e23. | 253 | \u0e42\u0e14\u0e22\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d \u0e14\u0e23. | ||
| 254 | \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | 254 | \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | ||
| 255 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | 255 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | ||
| 256 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | 256 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | ||
| 257 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | 257 | 0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e0a\u0e38\u0e14\u0e19\u0e35\u0e49 | ||
| 258 | \u0e32\u0e02\u0e2d\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n2. | 258 | \u0e32\u0e02\u0e2d\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\r\n\r\n2. | ||
| 259 | 0e19\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | 259 | 0e19\u0e40\u0e0a\u0e34\u0e07\u0e1e\u0e32\u0e13\u0e34\u0e0a\u0e22\u0e4c | ||
| 260 | 2\u0e49\u0e2d\u0e15\u0e48\u0e2d\u0e44\u0e1b\u0e19\u0e35\u0e49\r\n\r\n- | 260 | 2\u0e49\u0e2d\u0e15\u0e48\u0e2d\u0e44\u0e1b\u0e19\u0e35\u0e49\r\n\r\n- | ||
| 261 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 1: | 261 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 1: | ||
| 262 | 0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e02\u0e19\u0e32\u0e14 | 262 | 0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e02\u0e19\u0e32\u0e14 | ||
| 263 | 50,000 \u0e04\u0e33 | 263 | 50,000 \u0e04\u0e33 | ||
| 264 | 0e2d\u0e21\u0e39\u0e25\u0e17\u0e35\u0e48\u0e41\u0e19\u0e1a\u0e21\u0e32 | 264 | 0e2d\u0e21\u0e39\u0e25\u0e17\u0e35\u0e48\u0e41\u0e19\u0e1a\u0e21\u0e32 | ||
| 265 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | 265 | 0e22\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22 | ||
| 266 | 7\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e14\u0e49\u0e27\u0e22\r\n\r\n- | 266 | 7\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e14\u0e49\u0e27\u0e22\r\n\r\n- | ||
| 267 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 2: | 267 | \u0e17\u0e32\u0e07\u0e40\u0e25\u0e37\u0e2d\u0e01\u0e17\u0e35\u0e48 2: | ||
| 268 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | 268 | 0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e21\u0e39\u0e25\u0e19\u0e35\u0e49 | ||
| 269 | 0e07\u0e32\u0e19\u0e15\u0e25\u0e2d\u0e14\u0e0a\u0e35\u0e27\u0e34\u0e15 | 269 | 0e07\u0e32\u0e19\u0e15\u0e25\u0e2d\u0e14\u0e0a\u0e35\u0e27\u0e34\u0e15 | ||
| 270 | 0e35\u0e49\u0e42\u0e1b\u0e23\u0e14\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d | 270 | 0e35\u0e49\u0e42\u0e1b\u0e23\u0e14\u0e15\u0e34\u0e14\u0e15\u0e48\u0e2d | ||
| 271 | \u0e14\u0e23. \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | 271 | \u0e14\u0e23. \u0e40\u0e17\u0e1e\u0e0a\u0e31\u0e22 | ||
| 272 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | 272 | \u0e17\u0e23\u0e31\u0e1e\u0e22\u0e4c\u0e19\u0e34\u0e18\u0e34 | ||
| 273 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | 273 | 0e48\u0e32\u0e19\u0e17\u0e32\u0e07\u0e2d\u0e35\u0e40\u0e21\u0e25\u0e4c | ||
| 274 | thepchai@nectec.or.th | 274 | thepchai@nectec.or.th | ||
| 275 | u0e40\u0e1e\u0e34\u0e48\u0e21\u0e40\u0e15\u0e34\u0e21\r\n\r\nAGREEMENT | 275 | u0e40\u0e1e\u0e34\u0e48\u0e21\u0e40\u0e15\u0e34\u0e21\r\n\r\nAGREEMENT | ||
| 276 | OF USAGE\r\n\r\n1. Non-commercial use, research, and open | 276 | OF USAGE\r\n\r\n1. Non-commercial use, research, and open | ||
| 277 | source\r\n\r\nAny non-commercial use of the dataset for research and | 277 | source\r\n\r\nAny non-commercial use of the dataset for research and | ||
| 278 | open-sourced projects is encouraged and free of charge. Please cite | 278 | open-sourced projects is encouraged and free of charge. Please cite | ||
| 279 | our technical report for reference.\r\n\r\nIf you want to perpetuate | 279 | our technical report for reference.\r\n\r\nIf you want to perpetuate | ||
| 280 | your models trained on our dataset and share them to the research | 280 | your models trained on our dataset and share them to the research | ||
| 281 | community in Thailand, please send your models, code, and APIs to the | 281 | community in Thailand, please send your models, code, and APIs to the | ||
| 282 | AI for Thai Project. Please contact Dr. Thepchai Supnithi via | 282 | AI for Thai Project. Please contact Dr. Thepchai Supnithi via | ||
| 283 | thepchai@nectec.or.th for more information.\r\n\r\nNote that | 283 | thepchai@nectec.or.th for more information.\r\n\r\nNote that | ||
| 284 | modification and redistribution of the dataset by any means are | 284 | modification and redistribution of the dataset by any means are | ||
| 285 | strictly prohibited unless authorized by the corpus authors.\r\n\r\n2. | 285 | strictly prohibited unless authorized by the corpus authors.\r\n\r\n2. | ||
| 286 | Commercial use\r\n\r\nIn any commercial use of the dataset, there are | 286 | Commercial use\r\n\r\nIn any commercial use of the dataset, there are | ||
| 287 | two options.\r\n\r\n- Option 1 (in kind): Contributing a dataset of | 287 | two options.\r\n\r\n- Option 1 (in kind): Contributing a dataset of | ||
| 288 | 50,000 words completely annotated with our annotation scheme within 1 | 288 | 50,000 words completely annotated with our annotation scheme within 1 | ||
| 289 | year. Your data will also be shared and recognized as a dataset | 289 | year. Your data will also be shared and recognized as a dataset | ||
| 290 | co-creator in the research community in Thailand.\r\n\r\n- Option 2 | 290 | co-creator in the research community in Thailand.\r\n\r\n- Option 2 | ||
| 291 | (in cash): Purchasing a lifetime license for the entire dataset is | 291 | (in cash): Purchasing a lifetime license for the entire dataset is | ||
| 292 | required. The purchased rights of use cover only this | 292 | required. The purchased rights of use cover only this | ||
| 293 | dataset.\r\n\r\nIn both options, please contact Dr. Thepchai Supnithi | 293 | dataset.\r\n\r\nIn both options, please contact Dr. Thepchai Supnithi | ||
| 294 | via thepchai@nectec.or.th for more information.\r\n", | 294 | via thepchai@nectec.or.th for more information.\r\n", | ||
| 295 | "resource_data_collect": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | 295 | "resource_data_collect": "\u0e2d\u0e37\u0e48\u0e19\u0e46", | ||
| 296 | "resource_data_collect_other": | 296 | "resource_data_collect_other": | ||
| 297 | 2d\u0e1a\u0e40\u0e02\u0e15\u0e23\u0e30\u0e14\u0e31\u0e1a\u0e04\u0e33", | 297 | 2d\u0e1a\u0e40\u0e02\u0e15\u0e23\u0e30\u0e14\u0e31\u0e1a\u0e04\u0e33", | ||
| 298 | "resource_disaggregate": [], | 298 | "resource_disaggregate": [], | ||
| 299 | "resource_type": null, | 299 | "resource_type": null, | ||
| 300 | "size": 16117011, | 300 | "size": 16117011, | ||
| 301 | "state": "active", | 301 | "state": "active", | ||
| 302 | "url": | 302 | "url": | ||
| 303 | 063e2392-1eba-4099-a732-fbaf1ba9a293/download/opend_lst20_corpus.zip", | 303 | 063e2392-1eba-4099-a732-fbaf1ba9a293/download/opend_lst20_corpus.zip", | ||
| 304 | "url_type": null | 304 | "url_type": null | ||
| 305 | } | 305 | } | ||
| 306 | ], | 306 | ], | ||
| 307 | "state": "active", | 307 | "state": "active", | ||
| 308 | "tags": [ | 308 | "tags": [ | ||
| 309 | { | 309 | { | ||
| 310 | "display_name": | 310 | "display_name": | ||
| 311 | "\u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e04\u0e27\u0e32\u0e21", | 311 | "\u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e04\u0e27\u0e32\u0e21", | ||
| 312 | "id": "ed82812c-262c-4e30-96d4-2ce6c3ddc0f5", | 312 | "id": "ed82812c-262c-4e30-96d4-2ce6c3ddc0f5", | ||
| 313 | "name": | 313 | "name": | ||
| 314 | "\u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e04\u0e27\u0e32\u0e21", | 314 | "\u0e04\u0e25\u0e31\u0e07\u0e02\u0e49\u0e2d\u0e04\u0e27\u0e32\u0e21", | ||
| 315 | "state": "active", | 315 | "state": "active", | ||
| 316 | "vocabulary_id": null | 316 | "vocabulary_id": null | ||
| 317 | } | 317 | } | ||
| 318 | ], | 318 | ], | ||
| 319 | "title": "LST20 Corpus", | 319 | "title": "LST20 Corpus", | ||
| 320 | "type": "dataset", | 320 | "type": "dataset", | ||
| 321 | "update_frequency_interval": "", | 321 | "update_frequency_interval": "", | ||
| 322 | "update_frequency_unit": | 322 | "update_frequency_unit": | ||
| 323 | "\u0e44\u0e21\u0e48\u0e17\u0e23\u0e32\u0e1a", | 323 | "\u0e44\u0e21\u0e48\u0e17\u0e23\u0e32\u0e1a", | ||
| 324 | "update_frequency_unit_other": "", | 324 | "update_frequency_unit_other": "", | ||
| 325 | "url": "https://aiforthai.in.th/corpus.php", | 325 | "url": "https://aiforthai.in.th/corpus.php", | ||
| 326 | "version": null | 326 | "version": null | ||
| 327 | } | 327 | } |
