From 3dd125e1e39651d382d6cf8db75ffe336f90639b Mon Sep 17 00:00:00 2001
From: wb2osz <wb2osz@comcast.net>
Date: Thu, 24 Mar 2022 00:10:21 +0100
Subject: [PATCH] UTF-8 discussion

---
 src/decode_aprs.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/decode_aprs.c b/src/decode_aprs.c
index 8763e04..e56fb80 100644
--- a/src/decode_aprs.c
+++ b/src/decode_aprs.c
@@ -731,6 +731,26 @@ void decode_aprs_print (decode_aprs_t *A) {
  * To be part of a valid UTF-8 sequence, it would need to be followed by 10xxxxxx.
  */
 
+// For values 00-7F, ASCII, Unicode, and ISO Latin-1 are all the same.
+// ISO Latin-1 adds 80-FF range with a few common symbols, such as degree, and
+// letters, with diacritical marks, for many European languages.
+// Unicode range 80-FF is called "Latin-1 Supplement."  Exactly the same as ISO Latin-1.
+// For UTF-8, an additional byte is inserted.
+//	Unicode		UTF-8
+//	-------		-----
+//	8x		C2 8x		Insert C2, keep original
+//	9x		C2 9x		"
+//	Ax		C2 Ax		"
+//	Bx		C2 Bx		"
+//	Cx		C3 8x		Insert C3, subtract 40 from original
+//	Dx		C3 9x		"
+//	Ex		C3 Ax		"
+//	Fx		C3 Bx		"
+//
+// Can we use this knowledge to provide guidance on other ISO Latin-1 characters besides degree?
+// Should we?
+// Reference:   https://www.fileformat.info/info/unicode/utf8test.htm
+
 	  if ( ! A->g_quiet) {
 
 	    for (j=0; j<n; j++) {
@@ -3866,6 +3886,9 @@ static int data_extension_comment (decode_aprs_t *A, char *pdext)
 	    strlcpy (A->g_directivity, dir[pdext[6]-'0'], sizeof(A->g_directivity));
 	  }
 
+// TODO: look for another 0-9 A-Z followed by a /
+// http://www.aprs.org/aprs12/probes.txt
+
 	  process_comment (A, pdext+7, -1);
 	  return 1;
 	}