diff options
author | James Cowgill <jcowgill@debian.org> | 2023-11-29 19:50:06 +0000 |
---|---|---|
committer | James Cowgill <jcowgill@debian.org> | 2023-11-29 19:50:06 +0000 |
commit | 1a23af3ab4ffcc045291cfa23f80d842edc4877d (patch) | |
tree | e96d8c8a4bce7ae5b391d24081b090bed5d1f466 /script | |
parent | 8c56f68b6fb386c5df7e2b7c2e2e72d3f392c844 (diff) | |
parent | 46fbd223191890161d0409ee542ac6bc22537bf7 (diff) |
Update upstream source from tag 'upstream/0.0.8'
Update to upstream version '0.0.8'
with Debian dir e343cd97b840fe74c9b2e75b834a8faac2e35ef3
Diffstat (limited to 'script')
-rwxr-xr-x | script/BuildLangModel.py | 21 | ||||
-rw-r--r-- | script/BuildLangModelLogs/LangDanishModel.log | 384 | ||||
-rw-r--r-- | script/charsets/ibm865.py | 71 | ||||
-rw-r--r-- | script/langs/da.py | 2 | ||||
-rw-r--r-- | script/langs/no.py | 55 |
5 files changed, 388 insertions, 145 deletions
diff --git a/script/BuildLangModel.py b/script/BuildLangModel.py index 38ac793..43f975c 100755 --- a/script/BuildLangModel.py +++ b/script/BuildLangModel.py @@ -50,6 +50,7 @@ import requests import sys import re import os +import random # Custom modules. import charsets.db @@ -240,12 +241,22 @@ def visit_pages(titles, depth, lang, logfd): return next_titles = [] + if options.max_page is not None: + max_titles = int(options.max_page/(options.max_depth * options.max_depth)) + else: + max_titles = sys.maxsize for title in titles: if options.max_page is not None and \ len(visited_pages) > options.max_page: return if title in visited_pages: continue + + # Ugly hack skipping internal pages + if 'wiki' in title or 'Wiki' in title: + print('Skipping', title) + continue + visited_pages += [title] try: page = wikipedia.page(title) @@ -255,16 +266,22 @@ def visit_pages(titles, depth, lang, logfd): print("Discarding page {}.\n".format(title)) continue logfd.write("\n{} (revision {})".format(title, page.revision_id)) + logfd.flush() process_text(page.content, lang) try: - next_titles += page.links + links = page.links + random.shuffle(links) + if len(links) > max_titles: + links = links[:max_titles] + next_titles += links except KeyError: pass if depth >= options.max_depth: return + random.shuffle(next_titles) visit_pages (next_titles, depth + 1, lang, logfd) language_c = lang.name.replace('-', '_').title() @@ -277,6 +294,7 @@ logfd.write('\n- Maximum depth: {}'.format(options.max_depth)) if options.max_page is not None: logfd.write('\n- Max number of pages: {}'.format(options.max_page)) logfd.write('\n\n== Parsed pages ==\n') +logfd.flush() try: visit_pages(lang.start_pages, 0, lang, logfd) except requests.exceptions.ConnectionError: @@ -284,6 +302,7 @@ except requests.exceptions.ConnectionError: exit(1) logfd.write('\n\n== End of Parsed pages ==') logfd.write('\n\n- Wikipedia parsing ended at: {}\n'.format(str(datetime.datetime.now()))) +logfd.flush() ########### CHARACTERS ########### diff --git a/script/BuildLangModelLogs/LangDanishModel.log b/script/BuildLangModelLogs/LangDanishModel.log index cf183b3..14bf65d 100644 --- a/script/BuildLangModelLogs/LangDanishModel.log +++ b/script/BuildLangModelLogs/LangDanishModel.log @@ -1,158 +1,256 @@ = Logs of language model for Danish (da) = - Generated by BuildLangModel.py -- Started: 2016-02-19 17:53:58.564190 -- Maximum depth: 4 -- Max number of pages: 100 +- Started: 2022-11-30 19:37:01.097250 +- Maximum depth: 2 +- Max number of pages: 200 == Parsed pages == -Forside (revision 2692411) -16. februar (revision 6877446) -17. februar (revision 8454583) -1878 (revision 8280505) -19. februar (revision 8206479) -1922 (revision 8455105) -1926 (revision 8425271) -1942 (revision 8443554) -1945 (revision 8448461) -1948 (revision 8454392) -1985 (revision 8409096) -2. verdenskrig (revision 8433181) -23. oktober (revision 6877825) -26. oktober (revision 7849938) -3C 273 (revision 8443798) -A-bus (revision 8427319) -Aktuelle begivenheder (revision 8440596) -B-52 Stratofortress (revision 8422571) -Borgerkrigen i Syrien (revision 8447763) -Boutros Boutros-Ghali (revision 8453935) -Brasilien (revision 8452750) -Cusco (region) (revision 7693764) -Danmark (revision 8451178) -Danmark i Eurovision Song Contest (revision 8453514) -Dansk (sprog) (revision 8455750) -Dansk Melodi Grand Prix 2016 (revision 8452164) -Dobbeltmordet på Peter Bangs Vej (revision 8334648) -Encyklopædi (revision 8446641) -Eritrea-sagen (revision 8452285) -Eurovision Song Contest 2014 (revision 8445804) -Eurovision Song Contest 2016 (revision 8453588) -Flygtningekrisen i Europa 2015 (revision 8452286) -Fonograf (revision 8177165) -Formel 1 (revision 8450846) -Formel 1 2016 (revision 8456463) -Frederik 6. (revision 8438503) -Første observation af gravitationsbølger (revision 8451269) -Grammofon (revision 8375093) -Guadalcanal (revision 7796248) -Harper Lee (revision 8456583) -Hartkorn (revision 8437552) -IC4 (revision 8446402) -IC4-sagen (revision 8434463) -Islamisk Stat (revision 8439228) -Jonathan Leunbach (revision 8452603) -Juliane Marie af Braunschweig-Wolfenbüttel (revision 8437957) -Kaliumklorid (revision 8452216) -Kejserriget Japan (revision 8044942) -Kevin Magnussen (revision 8455302) -København (revision 8427847) -LIGO (revision 8451266) -Latinamerika (revision 7692181) -Leonid Hurwicz (revision 8445727) -Lighthouse X (revision 8452940) -Linkoban (revision 8455879) -Machu Picchu (revision 8406907) -Matador (tv-serie) (revision 8454648) -Middelaldercentret (revision 8449194) -Nobelprisen (revision 8409809) -Nykøbing Falster (revision 8452825) -Nyligt afdøde (revision 8456580) -Overvågning (revision 8455039) -Panorama (foto) (revision 8448393) -Peru (revision 8437485) -Peter Lauritsen (revision 8456097) -Professor (revision 8415451) -Renault F1 (revision 8450843) -S-bus (revision 8455589) -Salomonøerne (revision 8238961) -Slaget om Belgien (1940) (revision 8430013) -Slaget om Guadalcanal (revision 7762887) -Slaget om Henderson Field (revision 8445480) -Slaget om Iwo Jima (revision 8145239) -Soldiers of Love (Lighthouse X-sang) (revision 8452929) -Solen (revision 8276478) -Stillehavskrigen (revision 8430649) -Stockholm (revision 8358042) -Søslaget ved Guadalcanal (revision 7772812) -Thomas Edison (revision 8282441) -Togulykken ved Bad Aibling (revision 8455364) -Topografi (revision 6886168) -USA (revision 8448088) -United States Army (revision 8401635) -United States Marine Corps (revision 8401667) -Vestallierede (revision 6961443) -Wikimedia (revision 8263252) -Wikipedia (revision 8267051) -Zikavirus (revision 8454832) -1. februar (revision 8404985) -10. februar (revision 6877431) -11. februar (revision 6877433) -12. februar (revision 6877437) -13. februar (revision 6877438) -14. februar (revision 6877441) -1497 (revision 7369489) -15. februar (revision 7329463) -1560 (revision 7874693) -1568 (revision 7369703) -1620 (revision 7423903) -1688 (revision 7367090) -18. februar (revision 6877450) +Forside (revision 10000691) +Hans Magnus Enzensberger (revision 11341046) +28. november (revision 9410945) +Golfkrigen (revision 11144370) +29. november (revision 6877900) +8. december (revision 10277754) +Det Konservative Folkeparti (revision 11313857) +1990 (revision 11340072) +1940 (revision 11263756) +Angolas håndboldlandshold (damer) (revision 11331888) +Skjoldvulkan (revision 10870812) +Casper & Mandrilaftalen (revision 11221713) +26. november (revision 10617630) +Døde i 2022 (revision 11343986) +Vikingetidens rustning og våben (revision 11332607) +Middelaldercentret (revision 11339897) +Ruslands invasion af Ukraine 2022 (revision 11335164) +Saddam Hussein (revision 11002258) +The Jimi Hendrix Experience (revision 10497780) +Færøerne (revision 11333678) +27. november (revision 9745974) +Thomas Vinterberg (revision 11234643) +Anwar Ibrahim (revision 11342876) +Mandatområdet i Palæstina (revision 11341286) +Kunst (revision 11336917) +Afrikamesterskabet i håndbold 2022 (kvinder) (revision 11341917) +Dansk (sprog) (revision 11313509) +Sergej Sjojgu (revision 11309097) +Fernando Gomes (revision 11340427) +Folketinget (revision 11330485) +15. januar (revision 10515606) +Rock and Roll Hall of Fame (revision 8408189) +Thomas Edison (revision 11052704) +Ukraine (revision 11334630) +1947 (revision 11252357) +1937 (revision 11303923) +IC4 (revision 11317878) +Jimi Hendrix (revision 11341476) +Ismail Sabri Yaakob (revision 11105534) +Okipa-ceremonien (revision 11340589) +SI-præfiks (revision 11332802) +Sporvejsmuseet Skjoldenæsholms historie (revision 11338275) +Irak (revision 11255676) +Woodstockfestivalen (revision 11226413) +Nikolaj Lie Kaas (revision 11322663) +Torben Rechendorff (revision 11342962) +Folketingsvalget 2022 (revision 11339557) +Kherson (revision 11314559) +Keltere (revision 11318773) +Little Richard (revision 11226619) +Invasion (revision 10307980) +Tate Gallery (revision 8312688) +24. januar (revision 10441562) +Hans Christian Ægidius (revision 9773029) +Slaget ved Irpin (1321) (revision 11230064) +Auschwitz (revision 11310714) +Jazz fusion (revision 11223082) +Lutsk (revision 11248429) +Planetarium (revision 11266837) +Bibliothèque nationale de France (revision 11055813) +Digtsamling (revision 10585337) +Kenneth Gøtterup (revision 11027437) +Straf (revision 11007456) +1716 (revision 11339928) +Kamel (revision 11285016) +Amnesti (revision 10831621) +Zulu Royal (revision 10969220) +Stephen Roche (revision 11239346) +13. december (revision 10768225) +Enhed (politisk parti) (revision 10158693) +The Everly Brothers (revision 10865882) +3. november (revision 9423371) +Annelise Gotfredsen (revision 11306090) +Virtual International Authority File (revision 8702589) +Europæiske Fællesskab (revision 10868689) +Væringer (revision 11331002) +Rom (revision 11341285) +Decentralisering (revision 11154770) +Kreml (Moskva) (revision 11045482) +Folketingsvalget 1994 (revision 11266325) +28. december (revision 6878014) +Østjyllands Storkreds (revision 11201505) +Bruxelles (revision 10802416) +Erik Haunstrup Clemmensen (revision 10627614) +Hviderussere (revision 10750673) +Hvidmelet Gåsefod (revision 11317723) +Mario Draghi (revision 11302527) +Folketingsvalget 1968 (revision 11300317) +Skudår (revision 10360386) +1921 (revision 11303917) +Rundkørsel (revision 11103019) +Valerij Zaluzjnyj (revision 11335164) +Angrebet på Pearl Harbor (revision 11309782) +Folketingsmedlemmer valgt i 2007 (revision 11187293) +Ingeniørvidenskab (revision 9816520) +Vikinger (revision 11327511) +Martin Luther King (revision 11320659) +1757 (revision 11186195) +Dieseltogsæt (revision 8177984) +El-værk (revision 11334293) +Soul (revision 11283982) +John McVie (revision 11040471) +Botswanas håndboldlandshold (herrer) (revision 11333322) +1971 (revision 11243510) +Rana Hussein (revision 11266594) +DR (revision 11342995) +Ewan McGregor (revision 11331681) +Eliane Paulo (revision 10589121) +Zepto- (revision 11332802) +København (revision 11336925) +Gallien (revision 9984925) +Augustoprøret (revision 11234324) +1991 (revision 11250037) +Afledte SI-enheder (revision 11097802) +Gemeinsame Normdatei (revision 11281765) +Litteraturvidenskab (revision 10931878) +Thorvald Stauning (revision 11107677) +Afrikamesterskabet i håndbold 2018 (mænd) (revision 11131830) +Folkeforbundet (revision 11315450) +Readymades (revision 10932287) +Al Anbar (revision 9458175) +2007 (revision 11250033) +Varieté (revision 10934358) +Damaskus (revision 11030795) +Palæstina (revision 11311424) +1569 (revision 10832219) +Pædagog (revision 11251603) +Carina Christensen (revision 11073847) +Vest-Tyskland (revision 10580737) +20. november (revision 6877846) +Tessa Jowell (revision 11225831) +Hillerød (revision 11317306) +Påskekrisen (1920) (revision 11287865) +Grad (vinkelmål) (revision 9624298) +Kvinde (revision 11333939) +1931 (revision 11236350) +Afrikaans (revision 11080347) +Den Store Danske Encyklopædi (revision 11301417) +22. juni (revision 10375853) +Automatic Train Control (revision 10619401) +Luc Montagnier (revision 11162267) +Reprise Records (revision 11081843) +1966 (revision 11336105) +Prosa (skriveform) (revision 11236012) +Michael af Rumænien (revision 10819975) +Mykolajiv (revision 11236676) +Khmelnytskyj oblast (revision 11188686) +Sierra Leones håndboldlandshold (herrer) (revision 11333322) +1969 (revision 11340081) +H. Edvard Hass (revision 10348478) +Københavns Idrætspark (revision 9400386) +Sanna Nielsen (revision 11315712) +19. maj (revision 7148596) +Patricia Schumann (revision 10952761) +Torstenssonfejden (revision 11326728) +International Standard Name Identifier (revision 10880739) +Bent Mejding (revision 11335462) +Afdeling Q (revision 11279134) +Alfred Bindslev (revision 10398140) +Sakser (revision 9042633) +Folketingsmedlemmer valgt i 1998 (revision 11213304) +1996 (revision 11229565) +1 (tal) (revision 9378579) +Farrah Fawcett (revision 10977527) +Google+ (revision 10469085) +1530 (revision 10865231) +De største helte (revision 10737852) +Afrikamesterskabet i håndbold 1974 (mænd) (revision 11018946) +1902 (revision 11217211) +ISO 639-3 (revision 10880691) +1974 (revision 11336110) +Dansk fonologi (revision 11226101) +Europa (revision 11149054) +Sovemedicin (revision 11327388) +Slotsbryggen (Nykøbing Falster) (revision 11005548) +Olieraffinaderi (revision 11322152) +Slaget ved Stiklestad (revision 11261889) +Rolling Stone (revision 11267586) +Jørgen Hald (revision 10296412) +Nikolaj Coster-Waldau (revision 11228953) +Aserbajdsjan (revision 11297538) +Kultstatus (revision 7820159) +Al Kut (revision 9425606) +Library of Congress Control Number (revision 8316539) +Rwandas håndboldlandshold (herrer) (revision 11333322) +Levon Helm (revision 11317127) +Howard Hughes (revision 11040881) +Wim Kieft (revision 10910953) +Afrikamesterskabet i håndbold 2016 (mænd) (revision 11018957) +24. februar (revision 10755036) +Iværksætter (revision 10972242) +1992 (revision 11303945) +Internationalt Standardbognummer (revision 11037702) +Afrikamesterskabet i håndbold 2014 (mænd) (revision 11018956) +En mand kommer hjem (revision 10737861) +Jamaica (revision 11243987) +Henitjesk (revision 11328921) +August (revision 11210562) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-02-19 17:56:42.162636 +- Wikipedia parsing ended at: 2022-11-30 19:41:17.518631 -53 characters appeared 1301488 times. +60 characters appeared 1532370 times. -First 30 characters: -[ 0] Char e: 15.272749345364689 % -[ 1] Char r: 8.48482659847805 % -[ 2] Char n: 7.695652975670924 % -[ 3] Char t: 6.977014002434137 % -[ 4] Char a: 6.780469739252302 % -[ 5] Char i: 6.164636170291236 % -[ 6] Char s: 6.0942551909814 % -[ 7] Char d: 5.953493232361728 % -[ 8] Char l: 5.076650725938311 % -[ 9] Char o: 4.883026197706011 % -[10] Char g: 4.012253666572415 % -[11] Char k: 3.232607599916403 % -[12] Char m: 3.0863135119186653 % -[13] Char f: 2.701600014752345 % -[14] Char v: 2.13970470722742 % -[15] Char b: 1.982423195603801 % -[16] Char u: 1.8339777239590376 % -[17] Char p: 1.5789619266562582 % -[18] Char h: 1.3433085821767086 % -[19] Char ø: 0.8730775850411222 % -[20] Char y: 0.859938777768216 % -[21] Char å: 0.7699648402443973 % -[22] Char æ: 0.7208671920140639 % -[23] Char j: 0.644108896893402 % -[24] Char c: 0.5698093259407694 % -[25] Char w: 0.11087309295206717 % -[26] Char z: 0.05309307500338075 % -[27] Char x: 0.032424424965885205 % -[28] Char é: 0.032193919575132464 % -[29] Char q: 0.012139950579644223 % +First 31 characters: +[ 0] Char e: 15.035728968852169 % +[ 1] Char r: 8.617892545534042 % +[ 2] Char n: 7.618264518360449 % +[ 3] Char t: 6.856503324915001 % +[ 4] Char a: 6.475133290262796 % +[ 5] Char i: 6.3714377076032545 % +[ 6] Char s: 6.279488635251278 % +[ 7] Char d: 5.919523352715076 % +[ 8] Char l: 5.094722553952375 % +[ 9] Char o: 4.86860223053179 % +[10] Char g: 3.8343872563414845 % +[11] Char k: 3.3303314473658454 % +[12] Char m: 3.2096034247603384 % +[13] Char f: 2.608247355403721 % +[14] Char v: 2.342188896937424 % +[15] Char u: 1.9602967951604378 % +[16] Char b: 1.9047619047619049 % +[17] Char p: 1.5793183108518178 % +[18] Char h: 1.45728512043436 % +[19] Char ø: 0.8954103773892728 % +[20] Char æ: 0.7449897870618715 % +[21] Char å: 0.7295235484902471 % +[22] Char y: 0.6777736447463732 % +[23] Char j: 0.666418684782396 % +[24] Char c: 0.5946344551250677 % +[25] Char w: 0.12248999915164091 % +[26] Char z: 0.06571519933175407 % +[27] Char x: 0.045354581465311905 % +[28] Char é: 0.021926819240783886 % +[29] Char ó: 0.009592983417842949 % +[30] Char q: 0.009397208246050236 % -The first 30 characters have an accumulated ratio of 0.9997241618823994. +The first 31 characters have an accumulated ratio of 0.9994694492844417. -964 sequences found. +1065 sequences found. -First 512 (typical positive ratio): 0.9968082796759031 -Next 512 (512-1024): 7.68351302509128e-07 -Rest: 3.903127820947816e-17 +First 512 (typical positive ratio): 0.9958348814328518 +Next 512 (512-1024): 2.6103356239028435e-06 +Rest: 3.268948339453948e-05 -- Processing end: 2016-02-19 17:56:42.304278 +- Processing end: 2022-11-30 19:41:17.605842 diff --git a/script/charsets/ibm865.py b/script/charsets/ibm865.py new file mode 100644 index 0000000..7fc3122 --- /dev/null +++ b/script/charsets/ibm865.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan <jehan@girinstud.io> +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +from codepoints import * + +name = 'IBM865' +aliases = ['CP865', '865', 'CSIBM865'] + +language = \ +{ + 'complete': [ 'no', 'da' ], + 'incomplete': [] +} + +# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # +charmap = \ +[ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 8X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET,SYM,SYM, # 9X + LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # BX + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # CX + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # DX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM, # EX + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # FX +] diff --git a/script/langs/da.py b/script/langs/da.py index 18d2379..31226a0 100644 --- a/script/langs/da.py +++ b/script/langs/da.py @@ -50,7 +50,7 @@ code = 'da' # ASCII characters are also used in French. use_ascii = True # The charsets we want to support and create data for. -charsets = ['ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252'] +charsets = ['ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252', 'IBM865' ] ## Optional Properties ## diff --git a/script/langs/no.py b/script/langs/no.py new file mode 100644 index 0000000..93cf23f --- /dev/null +++ b/script/langs/no.py @@ -0,0 +1,55 @@ +#!/bin/python3 +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan <jehan@girinstud.io> +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +## Mandatory Properties ## + +name = 'Norwegian' +code = 'no' +use_ascii = True +charsets = ['IBM865', 'ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252'] + +## Optional Properties ## + +# Alphabet characters. +alphabet = 'æøåéìîàêÆØÅ' +# Some pages that should contain most norwegian-norwegian norwegian +start_pages = ['Norsk', 'Saft', 'Hund'] +wikipedia_code = code +case_mapping = True |