From 3a31948f021edbf575e87af0977889d8c39c9166 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 26 Jan 2026 11:32:09 +0100 Subject: [PATCH] chore: initial commit --- .gitignore | 2 + converter.py | 170 +++++++++++++++++++++++++++++++++++++++++++ src/lib.typ | 198 +++++++++++++++++++++++++++++++++++++++++++++++++++ typst.toml | 14 ++++ 4 files changed, 384 insertions(+) create mode 100644 .gitignore create mode 100644 converter.py create mode 100644 src/lib.typ create mode 100644 typst.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9232550 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +law +.vscode \ No newline at end of file diff --git a/converter.py b/converter.py new file mode 100644 index 0000000..0597e46 --- /dev/null +++ b/converter.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import json + +from bs4 import BeautifulSoup + + +def get_eid(elmt): + return elmt["eId"].split("/")[-1] + + +def register_path(elmts, elmt): + parents = list(reversed(elmt.find_parents())) + in_body = False + + elmt = elmts + path = [] + for parent in parents: + name = parent.name + if not in_body: + if name == "body": + in_body = True + continue + + if not name in elmt["children"]: + elmt["children"][name] = {} + + eid = get_eid(parent) + if not eid in elmt["children"][name]: + elmt["children"][name][eid] = get_meta(parent) + + path.append((name, eid)) + elmt = elmt["children"][name][eid] + + return path + + +def get_meta(elmt): + name = elmt.name + meta = { + "children": {} + } + num = elmt.find("num") + if num: + meta["num"] = get_text(num) + + title = elmt.find("heading") + if title: + meta["heading"] = next(title.stripped_strings) + + return meta + + +def get_text(elmt): + children = list(filter(lambda e: e.name in (None, "b", "i"), elmt.children)) + return "".join(c.get_text().strip() for c in children).strip() + + +def parse_elmt(elmt): + name = elmt.name + + if name is None: + return elmt.get_text().strip() + + if name == "sup": + return { + "type": "sup", + "body": list(map(parse_elmt, elmt.children)) + } + + if name == "sub": + return { + "type": "sub", + "body": list(map(parse_elmt, elmt.children)) + } + + if name == "blockList": + intro = parse_elmt(elmt.find("listIntroduction")) + items = [] + + for item in elmt.find_all("item", recursive=False): + num = get_text(item.find("num")) + item_body = parse_elmt(item) + items.append((num, item_body)) + + return { + "type": "enum", + "intro": intro, + "items": items + } + + body = [] + for child in elmt.children: + if child.name != "num": + body.append(parse_elmt(child)) + + body = remove_empty(body) + return body + + +def remove_empty(body): + body = list(filter(lambda p: not isinstance(p, str) or p.strip(), body)) + if len(body) == 1: + return body[0] + return body + + +def convert(in_path, out_path): + with open(in_path, "r") as f: + bs = BeautifulSoup(f.read(), "xml") + + elmts = { + "children": { + "article": {} + } + } + for t in bs.find_all("transitional"): + t.extract() + + for p in bs.find_all("proviso"): + p.extract() + + for article in bs.find_all("article"): + pars = [] + for par in article.find_all("paragraph"): + num = par.find("num") + if num is None: + num = "1" + else: + num = get_text(num) + + text = par.find("content") + for note in text.find_all("authorialNote"): + note.extract() + + body = [] + + for child in text.children: + body.append(parse_elmt(child)) + + body = remove_empty(body) + + pars.append({ + "num": num, + "content": body + }) + + path = register_path(elmts, article) + eid = get_eid(article) + articles = elmts["children"]["article"] + if eid not in articles: + articles[eid] = [] + articles[eid].append({ + "num": get_text(article.num), + "path": path, + "children": pars + }) + + with open(out_path, "w") as f: + json.dump(elmts, f) + + +if __name__ == "__main__": + paths = [ + ("./raw/RS-311.0-01072024-FR.xml", "./law/code_penal.json"), + ] + + for in_path, out_path in paths: + print(f"{in_path} -> {out_path}") + convert(in_path, out_path) diff --git a/src/lib.typ b/src/lib.typ new file mode 100644 index 0000000..dd0dbd9 --- /dev/null +++ b/src/lib.typ @@ -0,0 +1,198 @@ +#import "@preview/showybox:2.0.3": showybox + +#let init-registries(regs) = { + [#metadata(regs) ] +} + +#let get-registries() = { + return query().last().value +} + +#let min-box(min-width: 0pt, min-height: 0pt, body) = grid( + columns: 2, + align: center + horizon, + v(min-height), + grid( + align: center + horizon, + rows: 2, + h(min-width), + body + ) +) + +#let badge(regs, registry) = box( + stroke: black, + inset: 0.2em, + radius: 0.3em, + baseline: 30%, + fill: regs.at(registry).color, + min-box( + strong(registry), + min-width: 1.5em, + min-height: 1.5em + ) +) + +#let parse-body(body, highlight: none) = { + if type(body) == str { + return eval("[" + body + "]") + } + if type(body) == array { + let result = [] + for part in body { + result += parse-body(part) + } + return result + } + if type(body) == dictionary { + if body.type == "sup" { + return super(parse-body(body.body)) + } + if body.type == "sub" { + return sub(parse-body(body.body)) + } + if body.type == "enum" { + let result = parse-body(body.intro) + linebreak() + let cells = () + for (i, (num, item)) in body.items.enumerate() { + let h = none + if highlight != none and i == highlight.at(0, default: none) { + h = highlight.slice(1) + } + let num-b = parse-body(num, highlight: h) + let item-b = parse-body(item, highlight: h) + + if h != none and h.len() == 0 { + num-b = strong(num-b) + item-b = strong(item-b) + } + cells.push(num-b) + cells.push(item-b) + } + result += grid( + columns: (2em, auto), + column-gutter: 1em, + row-gutter: 1em, + ..cells + ) + return result + } + } +} + +/// Cites an article from the Swiss law +/// - registry (str): registry id, must have been registered with `init-registries` +/// - article-id (str): article id (e.g. "art_28") +/// - par (none, int): optional paragraph to highlight. If not none, indicates the paragraph's number (as written in the law) +/// - highlight (none, int, array): optional part to highlight. If not none, indicates the index or path of indices to the part ot highlight +#let cite-law( + registry, + article-id, + par: none, + highlight: none, + whole-path: false +) = context { + let regs = get-registries() + + let highlight = if type(highlight) == int { + (highlight,) + } else { + highlight + } + + assert( + registry in regs, + message: "Unknown registry '" + registry + "'. Available registries: " + regs.keys().map(r => "'" + r + "'").join(", ") + ) + let reg = regs.at(registry).data + + let article-id = article-id + let article-idx = none + if type(article-id) == array { + (article-id, article-idx) = article-id + } + assert( + article-id in reg.children.article, + message: "Could not find article " + article-id + ". Existing ids: " + reg.children.article.keys().map(k => "'" + k + "'").join(", ") + ) + + let article = reg.children.article.at(article-id) + if article-idx != none { + if type(article) != array and article-idx != 0 { + panic("Provided an article index but there is only one") + } + article = article.at(article-idx) + } else { + if type(article) == array { + article = article.first() + //panic("Multiple articles with id " + article-id + ". Please provide an article index to choose (article-id, article-index)") + } + } + + let path = () + let elmt = reg + for (etype, eid) in article.path { + if eid.starts-with("lvl_u") { + continue + } + elmt = elmt.children.at(etype).at(eid) + if whole-path { + path.push(elmt.num.trim(":")) + } else { + path.push(elmt.heading.trim(".")) + } + } + + if not whole-path { + path = (path.last(),) + } + path.push(article.num) + + if par != none { + path.push("Par. " + str(par)) + } + + // Don't enum if only one paragraph + let par = par + if par == none and article.children.len() == 1 { + par = 1 + } + let body = if par == none { + enum( + spacing: 1em, + ..article.children.enumerate().map(((i, c)) => { + let h = if highlight != none { + if i == highlight.at(0, default: none) { + highlight.slice(1) + } + } + let r = parse-body(c.content, highlight: h) + if h != none and h.len() == 0 { + r = strong(r) + } + r + }) + ) + } else { + parse-body(article.children.at(par - 1).content, highlight: highlight) + } + + return showybox( + title: badge(regs, registry) + h(0.4em) + path.join[ #sym.arrow.r ], + frame: ( + title-color: gray.lighten(50%), + body-color: gray.lighten(80%), + border-color: gray.darken(30%) + ), + title-style: ( + color: black, + weight: "semibold", + boxed-style: (:) + ), + shadow: ( + offset: (x: 3pt, y: 5pt), + color: gray + ), + body + ) +} \ No newline at end of file diff --git a/typst.toml b/typst.toml new file mode 100644 index 0000000..8cebddf --- /dev/null +++ b/typst.toml @@ -0,0 +1,14 @@ +[package] +name = "fabulous-fedlex" +version = "0.1.0" +compiler = "0.13.1" +repository = "https://git.kb28.ch/HEL/fabulous-fedlex" +entrypoint = "src/lib.typ" +authors = [ + "Louis Heredero " +] +categories = ["visualization"] +license = "Apache-2.0" +description = "A package to draw sequence diagrams with CeTZ" +keywords = ["fedlex", "swiss", "law", "article"] +exclude = [ "gallery", "law" ] \ No newline at end of file