Detecting the dominant language in multiple documents#
1raise SystemExit("Stop right there!");
An exception has occurred, use %tb to see the full traceback.
SystemExit: Stop right there!
1# import the AWS SDK for python (boto3) -
2# http://boto3.readthedocs.io/en/latest/
3import boto3
4
5# import json module to serialize JSON -
6# https://docs.python.org/3.6/library/json.html
7import json
1# Instantiating a new comprehend client
2comprehend = boto3.client(service_name="comprehend")
3
4# Providing an English and Spanish text to analyze
5english_string_list = [
6 "Humans, eh? Think they" "re lords of creation.",
7 (
8 "Not like us cats. We *know* we are. Ever see a cat "
9 "feed a human? Case proven."
10 ),
11]
12spanish_string_list = [
13 "Los humanos, ¿eh? Se creen los señores de la creación.",
14 (
15 "No como nosotros los gatos. Nosotros *sabemos* que lo somos. "
16 "¿Has visto alguna vez a un gato alimentar a un humano? "
17 "Caso probado."
18 ),
19]
20french_string_list = [
21 (
22 "Les humains, hein ? Ils pensent qu''"
23 "ils sont les seigneurs de la création."
24 ),
25 (
26 "Pas comme nous, les chats. Nous *savons* que nous le sommes. "
27 "Vous avez déjà vu un chat nourrir un humain ? C''est prouvé."
28 ),
29]
30
31print("Calling BatchDetectDominantLanguage")
32
33print("english_string_list results:")
34# json.dumps() writes JSON data to a Python string
35print(
36 json.dumps(
37 comprehend.batch_detect_dominant_language(
38 TextList=english_string_list
39 ),
40 sort_keys=True,
41 indent=4,
42 )
43)
44
45print("\nspanish_string_list results:")
46print(
47 json.dumps(
48 comprehend.batch_detect_dominant_language(
49 TextList=spanish_string_list
50 ),
51 sort_keys=True,
52 indent=4,
53 )
54)
55
56print("\nfrench_string_list results:")
57print(
58 json.dumps(
59 comprehend.batch_detect_dominant_language(TextList=french_string_list),
60 sort_keys=True,
61 indent=4,
62 )
63)
64
65print("End of BatchDetectDominantLanguage\n")
Calling BatchDetectDominantLanguage
english_string_list results:
{
"ErrorList": [],
"ResponseMetadata": {
"HTTPHeaders": {
"content-length": "181",
"content-type": "application/x-amz-json-1.1",
"date": "Mon, 11 Apr 2022 05:50:40 GMT",
"x-amzn-requestid": "df422755-b7a1-4a72-9281-5c24f5e182a1"
},
"HTTPStatusCode": 200,
"RequestId": "df422755-b7a1-4a72-9281-5c24f5e182a1",
"RetryAttempts": 0
},
"ResultList": [
{
"Index": 0,
"Languages": [
{
"LanguageCode": "en",
"Score": 0.9121910333633423
}
]
},
{
"Index": 1,
"Languages": [
{
"LanguageCode": "en",
"Score": 0.9947839379310608
}
]
}
]
}
spanish_string_list results:
{
"ErrorList": [],
"ResponseMetadata": {
"HTTPHeaders": {
"content-length": "181",
"content-type": "application/x-amz-json-1.1",
"date": "Mon, 11 Apr 2022 05:50:40 GMT",
"x-amzn-requestid": "78cb6b5c-4e57-4a62-8940-fe1e103cc328"
},
"HTTPStatusCode": 200,
"RequestId": "78cb6b5c-4e57-4a62-8940-fe1e103cc328",
"RetryAttempts": 0
},
"ResultList": [
{
"Index": 0,
"Languages": [
{
"LanguageCode": "es",
"Score": 0.9952653646469116
}
]
},
{
"Index": 1,
"Languages": [
{
"LanguageCode": "es",
"Score": 0.9983640909194946
}
]
}
]
}
french_string_list results:
{
"ErrorList": [],
"ResponseMetadata": {
"HTTPHeaders": {
"content-length": "180",
"content-type": "application/x-amz-json-1.1",
"date": "Mon, 11 Apr 2022 05:50:41 GMT",
"x-amzn-requestid": "a9bf79ef-eb43-475b-ab30-c76332f5552e"
},
"HTTPStatusCode": 200,
"RequestId": "a9bf79ef-eb43-475b-ab30-c76332f5552e",
"RetryAttempts": 0
},
"ResultList": [
{
"Index": 0,
"Languages": [
{
"LanguageCode": "fr",
"Score": 0.9984113574028015
}
]
},
{
"Index": 1,
"Languages": [
{
"LanguageCode": "fr",
"Score": 0.996456503868103
}
]
}
]
}
End of BatchDetectDominantLanguage