Rogaton Claude commited on
Commit
0bf8a8c
·
1 Parent(s): 1fe0b70

Add interactive test corpus for Coptic translation and parsing

Browse files

- Added coptic_test_corpus.json with categorized examples:
* 5 simple sentences demonstrating basic grammatical structures
* 3 complex multi-clause sentences with subordination
* 2 paragraph-length texts (healing, parable)
* Dialectal comparison examples
* Grammar pattern demonstrations

- Integrated test corpus UI in sidebar:
* Category selection dropdown
* Example browser with preview
* Load button to insert examples into chat
* Displays English translation and grammar notes

- Connected loaded examples to chat processing:
* Examples automatically populate as user input
* Seamless integration with translation/parsing workflow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. apertus_ui.py +103 -3
  2. coptic_test_corpus.json +176 -0
apertus_ui.py CHANGED
@@ -451,7 +451,98 @@ with st.sidebar:
451
  st.write(f"**{match}** → {coptic_lexicon[match][:100]}...")
452
  else:
453
  st.write("No matches found")
454
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  # Linguistic analysis options for Coptic input
456
  if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
457
  st.subheader("Analysis Type")
@@ -518,8 +609,17 @@ for message in st.session_state.messages:
518
  with st.chat_message(message["role"]):
519
  st.markdown(message["content"])
520
 
521
- # User input
522
- if prompt := st.chat_input("Type your message..."):
 
 
 
 
 
 
 
 
 
523
  # Handle dependency parsing (doesn't need API token)
524
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type == 'dependency_parse':
525
  st.session_state.messages.append({"role": "user", "content": prompt})
 
451
  st.write(f"**{match}** → {coptic_lexicon[match][:100]}...")
452
  else:
453
  st.write("No matches found")
454
+
455
+ # Test Corpus Examples
456
+ if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
457
+ st.divider()
458
+ st.subheader("📖 Example Texts")
459
+
460
+ try:
461
+ import json
462
+ from pathlib import Path
463
+
464
+ corpus_path = Path(__file__).parent / "coptic_test_corpus.json"
465
+ if corpus_path.exists():
466
+ with open(corpus_path, 'r', encoding='utf-8') as f:
467
+ corpus = json.load(f)
468
+
469
+ # Category selection
470
+ categories = {
471
+ "simple_sentences": "Simple Sentences",
472
+ "complex_sentences": "Complex Sentences",
473
+ "short_texts": "Short Texts (Paragraphs)",
474
+ "grammar_patterns": "Grammar Patterns"
475
+ }
476
+
477
+ selected_category = st.selectbox(
478
+ "Choose category:",
479
+ options=list(categories.keys()),
480
+ format_func=lambda x: categories[x],
481
+ key="corpus_category"
482
+ )
483
+
484
+ if selected_category in corpus['categories']:
485
+ category_data = corpus['categories'][selected_category]
486
+
487
+ if selected_category == 'grammar_patterns':
488
+ # Handle grammar patterns differently
489
+ pattern_names = [p['pattern'] for p in category_data['patterns']]
490
+ selected_pattern = st.selectbox("Select pattern:", pattern_names, key="pattern_select")
491
+
492
+ pattern_data = next(p for p in category_data['patterns'] if p['pattern'] == selected_pattern)
493
+ st.caption(f"**Structure:** {pattern_data['structure']}")
494
+
495
+ example_texts = [f"{ex['coptic']} → {ex['english']}" for ex in pattern_data['examples']]
496
+ selected_example_idx = st.selectbox(
497
+ "Select example:",
498
+ range(len(pattern_data['examples'])),
499
+ format_func=lambda i: example_texts[i],
500
+ key="pattern_example"
501
+ )
502
+
503
+ example = pattern_data['examples'][selected_example_idx]
504
+
505
+ else:
506
+ # Handle regular examples
507
+ examples = category_data['examples']
508
+ example_labels = []
509
+ for ex in examples:
510
+ label = ex.get('title', ex['coptic'][:30] + '...' if len(ex['coptic']) > 30 else ex['coptic'])
511
+ example_labels.append(label)
512
+
513
+ selected_example_idx = st.selectbox(
514
+ "Select example:",
515
+ range(len(examples)),
516
+ format_func=lambda i: example_labels[i],
517
+ key="example_select"
518
+ )
519
+
520
+ example = examples[selected_example_idx]
521
+
522
+ # Display example details
523
+ with st.expander("📝 View Example", expanded=True):
524
+ st.markdown(f"**Coptic:**")
525
+ st.code(example['coptic'], language="")
526
+ st.markdown(f"**English:**")
527
+ st.write(example['english'])
528
+
529
+ if 'grammar_notes' in example:
530
+ st.caption(f"*Grammar:* {example['grammar_notes']}")
531
+ elif 'analysis' in example:
532
+ st.caption(f"*Analysis:* {example['analysis']}")
533
+
534
+ if 'source' in example:
535
+ st.caption(f"*Source:* {example['source']}")
536
+
537
+ # Load button
538
+ if st.button("📥 Load This Example", key="load_example", use_container_width=True):
539
+ st.session_state['example_text'] = example['coptic']
540
+ st.success("✓ Example loaded! Scroll down to chat input.")
541
+ st.rerun()
542
+
543
+ except Exception as e:
544
+ st.info("💡 Test corpus not available")
545
+
546
  # Linguistic analysis options for Coptic input
547
  if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
548
  st.subheader("Analysis Type")
 
609
  with st.chat_message(message["role"]):
610
  st.markdown(message["content"])
611
 
612
+ # Check if an example was loaded from the test corpus
613
+ prompt = None
614
+ if 'example_text' in st.session_state:
615
+ prompt = st.session_state['example_text']
616
+ del st.session_state['example_text'] # Clear after using
617
+
618
+ # User input (or use loaded example)
619
+ if not prompt:
620
+ prompt = st.chat_input("Type your message...")
621
+
622
+ if prompt:
623
  # Handle dependency parsing (doesn't need API token)
624
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and analysis_type == 'dependency_parse':
625
  st.session_state.messages.append({"role": "user", "content": prompt})
coptic_test_corpus.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "title": "Coptic Test Corpus",
4
+ "description": "Curated collection of Coptic texts for testing parser and translator",
5
+ "author": "André Linden",
6
+ "license": "CC BY-NC-SA 4.0",
7
+ "sources": "CopticScriptorium, Biblical texts, Documentary papyri",
8
+ "last_updated": "2025-11-17"
9
+ },
10
+ "categories": {
11
+ "simple_sentences": {
12
+ "name": "Simple Sentences",
13
+ "description": "Single clause sentences demonstrating basic grammatical structures",
14
+ "examples": [
15
+ {
16
+ "id": "simple_01",
17
+ "dialect": "Sahidic",
18
+ "coptic": "ⲁⲩⲱ ⲁϥⲙⲟⲩⲧⲉ ⲉⲣⲟϥ",
19
+ "english": "and he called him",
20
+ "grammar_notes": "Perfect conjugation (ⲁ-) with suffix pronoun",
21
+ "source": "CopticScriptorium"
22
+ },
23
+ {
24
+ "id": "simple_02",
25
+ "dialect": "Sahidic",
26
+ "coptic": "ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ ⲙⲡⲉⲕⲉⲓⲱⲧ",
27
+ "english": "I am the God of your father",
28
+ "grammar_notes": "Tripartite nominal sentence: Subject (ⲁⲛⲟⲕ) - Copula (ⲡⲉ) - Predicate (ⲡⲛⲟⲩⲧⲉ)",
29
+ "source": "Biblical (Genesis 46:3)"
30
+ },
31
+ {
32
+ "id": "simple_03",
33
+ "dialect": "Sahidic",
34
+ "coptic": "ⲙⲡⲣⲣ ϩⲟⲧⲉ",
35
+ "english": "Do not be afraid",
36
+ "grammar_notes": "Negative imperative with ⲙⲡⲣ-",
37
+ "source": "Biblical"
38
+ },
39
+ {
40
+ "id": "simple_04",
41
+ "dialect": "Sahidic",
42
+ "coptic": "ⲡϫⲟⲉⲓⲥ ⲡⲉ ⲡⲁⲛⲟⲩⲧⲉ",
43
+ "english": "The Lord is my God",
44
+ "grammar_notes": "Nominal sentence with possessive article",
45
+ "source": "Biblical"
46
+ },
47
+ {
48
+ "id": "simple_05",
49
+ "dialect": "Sahidic",
50
+ "coptic": "ⲁϥⲃⲱⲕ ⲉϩⲣⲁⲓ ⲉⲡⲉⲣⲡⲉ",
51
+ "english": "he went up to the temple",
52
+ "grammar_notes": "Perfect with directional ⲉϩⲣⲁⲓ (upward)",
53
+ "source": "Biblical"
54
+ }
55
+ ]
56
+ },
57
+ "complex_sentences": {
58
+ "name": "Complex Sentences",
59
+ "description": "Multi-clause sentences with subordination and coordination",
60
+ "examples": [
61
+ {
62
+ "id": "complex_01",
63
+ "dialect": "Sahidic",
64
+ "coptic": "ⲁⲩⲱ ⲛⲧⲉⲣⲉϥⲛⲁⲩ ⲉⲡⲙⲏⲏϣⲉ ⲁϥϣⲡϩⲧⲏϥ ⲉϩⲣⲁⲓ ⲉϫⲱⲟⲩ",
65
+ "english": "and when he saw the crowd, he had compassion on them",
66
+ "grammar_notes": "Temporal clause (ⲛⲧⲉⲣⲉ-) followed by main clause",
67
+ "source": "Biblical (Matthew 9:36)"
68
+ },
69
+ {
70
+ "id": "complex_02",
71
+ "dialect": "Sahidic",
72
+ "coptic": "ⲉϣⲱⲡⲉ ⲇⲉ ⲁⲩⲛⲁⲩ ⲉⲣⲟϥ ⲉϥⲙⲟⲟϣⲉ ϩⲓϫⲛ ⲧⲉⲑⲁⲗⲁⲥⲥⲁ ⲁⲩϣⲧⲟⲣⲧⲣ",
73
+ "english": "but when they saw him walking on the sea, they were troubled",
74
+ "grammar_notes": "Conditional clause with circumstantial ⲉϣⲱⲡⲉ",
75
+ "source": "Biblical (Mark 6:49)"
76
+ },
77
+ {
78
+ "id": "complex_03",
79
+ "dialect": "Sahidic",
80
+ "coptic": "ⲁⲓⲉⲓ ⲅⲁⲣ ⲉⲙⲟⲩⲧⲉ ⲁⲛ ⲉⲛⲇⲓⲕⲁⲓⲟⲥ ⲁⲗⲗⲁ ⲛⲣⲉϥⲣⲛⲟⲃⲉ",
81
+ "english": "for I came not to call the righteous, but sinners",
82
+ "grammar_notes": "Purpose clause with negation and contrast (ⲁⲛ...ⲁⲗⲗⲁ)",
83
+ "source": "Biblical (Matthew 9:13)"
84
+ }
85
+ ]
86
+ },
87
+ "short_texts": {
88
+ "name": "Short Texts (Paragraphs)",
89
+ "description": "Connected discourse of 2-3 paragraphs",
90
+ "examples": [
91
+ {
92
+ "id": "text_01",
93
+ "dialect": "Sahidic",
94
+ "title": "The Healing at the Pool",
95
+ "coptic": "ⲛⲉⲩⲛⲟⲩⲙⲏⲏϣⲉ ⲇⲉ ⲛϣⲱⲛⲉ ⲉⲩⲛⲕⲟⲧⲕ ϩⲙ ⲡⲙⲁ ⲉⲧⲙⲙⲁⲩ· ⲛϩⲁⲛⲃⲗⲗⲉ ⲙⲛ ⲛϩⲁⲛϭⲁⲗⲉ ⲙⲛ ⲛϣⲟⲩⲱⲟⲩ·\n\nⲛⲉⲩⲛⲟⲩⲣⲱⲙⲉ ⲇⲉ ⲙⲙⲁⲩ ⲉϥϣⲟⲟⲡ ϩⲛ ⲧⲉϥϣⲱⲛⲉ ⲙⲛⲧⲥⲛⲟⲟⲩⲥ ⲛⲣⲟⲙⲡⲉ· ⲛⲧⲉⲣⲉ ⲓⲏⲥⲟⲩⲥ ⲛⲁⲩ ⲉⲣⲟϥ ⲉϥⲛⲏϫ ⲁϥⲉⲓⲙⲉ ϫⲉ ⲟⲩⲛⲧⲁϥ ⲙⲙⲁⲩ ⲛⲟⲩⲛⲟϭ ⲛⲟⲩⲟⲉⲓϣ· ⲁⲩⲱ ⲡⲉϫⲁϥ ⲛⲁϥ ϫⲉ ⲕⲟⲩⲱϣ ⲉⲧⲣⲉⲕⲗⲟ·\n\nⲁⲡⲉⲧϣⲱⲛⲉ ⲟⲩⲱϣⲃ ⲛⲁϥ ϫⲉ ⲡϫⲟⲉⲓⲥ ⲙⲛⲧⲁⲓ ⲣⲱⲙⲉ ϫⲉⲕⲁⲥ ⲉϥϣⲁⲛⲙⲟⲩϩ ⲙⲡⲙⲟⲟⲩ ⲉϥⲉⲛⲟϫⲧ ⲉϩⲣⲁⲓ ⲉⲧⲕⲟⲗⲩⲙⲃⲏⲑⲣⲁ· ⲡⲉϫⲉ ⲓⲏⲥⲟⲩⲥ ⲇⲉ ⲛⲁϥ ϫⲉ ⲧⲱⲟⲩⲛ ⲛⲅϥⲓ ⲙⲡⲉⲕϭⲗⲟϫ ⲛⲅⲙⲟⲟϣⲉ· ⲁⲩⲱ ⲛⲧⲉⲩⲛⲟⲩ ⲁⲡⲣⲱⲙⲉ ⲗⲟ ⲁϥϥⲓ ⲙⲡⲉϥϭⲗⲟϫ ⲁϥⲙⲟⲟϣⲉ·",
96
+ "english": "Now there was a crowd of sick people lying there - the blind, the lame, and the paralyzed.\n\nThere was a man there who had been in his sickness for thirty-eight years. When Jesus saw him lying there and knew that he had been there a long time, he said to him: Do you want to be healed?\n\nThe sick man answered him: Lord, I have no one to put me into the pool when the water is troubled. Jesus said to him: Rise, take up your bed and walk. And immediately the man was healed, he took up his bed and walked.",
97
+ "grammar_notes": "Narrative with temporal sequencing, dialogue, circumstantial clauses",
98
+ "source": "Biblical (John 5:3-9, adapted)"
99
+ },
100
+ {
101
+ "id": "text_02",
102
+ "dialect": "Sahidic",
103
+ "title": "The Sower",
104
+ "coptic": "ⲉⲓⲥ ⲡⲉⲧϫⲟ ⲁϥⲉⲓ ⲉⲃⲟⲗ ⲉϫⲟ· ⲁⲩⲱ ⲛⲧⲉⲣⲉϥϫⲟ ϩⲟⲓⲛⲉ ⲙⲉⲛ ⲁⲩϩⲉ ϩⲁⲧⲏ ⲧⲉϩⲓⲏ· ⲁⲩⲱ ⲁⲩⲉⲓ ⲛϭⲓ ⲛϩⲁⲗⲁⲧⲉ ⲁⲩⲟⲙⲟⲩ·\n\nϩⲉⲛⲕⲟⲟⲩⲉ ⲇⲉ ⲁⲩϩⲉ ⲉϫⲛ ⲙⲡⲉⲧⲣⲁ ⲡⲙⲁ ⲉⲧⲉⲙⲛ ⲕⲁϩ ⲙⲙⲁⲩ ⲉⲙⲁⲧⲉ· ⲁⲩⲱ ⲛⲧⲉⲩⲛⲟⲩ ⲁⲩⲣⲟⲩⲱ ⲉⲃⲟⲗ ϫⲉ ⲙⲉⲩⲛⲧⲟⲩ ⲛⲟⲩⲛⲟⲩⲛ ⲛⲕⲁϩ· ⲛⲧⲉⲣⲉⲡⲣⲏ ⲇⲉ ϣⲁ ⲁⲩⲣⲟⲕϩ· ⲁⲩⲱ ⲉⲃⲟⲗ ϫⲉ ⲙⲉⲩⲛⲧⲟⲩ ⲛⲟⲩⲛⲟⲩⲛⲉ ⲁⲩϣⲟⲟⲩⲉ·",
105
+ "english": "Behold, the sower went out to sow. And as he sowed, some seeds fell beside the road, and the birds came and ate them.\n\nOthers fell on rocky ground where there was not much soil, and immediately they sprang up because they had no depth of soil. But when the sun rose they were scorched, and because they had no root they withered.",
106
+ "grammar_notes": "Parable narrative with temporal markers and causal clauses",
107
+ "source": "Biblical (Matthew 13:3-6, adapted)"
108
+ }
109
+ ]
110
+ },
111
+ "dialectal_comparison": {
112
+ "name": "Dialectal Variants",
113
+ "description": "Same text in different Coptic dialects",
114
+ "examples": [
115
+ {
116
+ "id": "dialect_01",
117
+ "text": "The Lord is my shepherd",
118
+ "sahidic": "ⲡϫⲟⲉⲓⲥ ⲡⲉⲧϣⲱⲛⲉ ⲙⲙⲟⲓ",
119
+ "bohairic": "ⲡϭⲟⲓⲥ ⲡⲉⲧⲁⲙⲟⲛⲓ",
120
+ "english": "The Lord is my shepherd",
121
+ "notes": "Psalm 23:1 - Note lexical differences: ϣⲱⲛⲉ (S) vs ⲁⲙⲟⲛⲓ (B)",
122
+ "source": "Biblical"
123
+ }
124
+ ]
125
+ },
126
+ "grammar_patterns": {
127
+ "name": "Grammatical Patterns",
128
+ "description": "Examples demonstrating specific grammatical constructions",
129
+ "patterns": [
130
+ {
131
+ "pattern": "Tripartite Nominal Sentence",
132
+ "structure": "Subject - Copula - Predicate",
133
+ "examples": [
134
+ {
135
+ "coptic": "ⲁⲛⲟⲕ ⲡⲉ ⲡⲛⲟⲩⲧⲉ",
136
+ "english": "I am God",
137
+ "analysis": "ⲁⲛⲟⲕ (subject pronoun) - ⲡⲉ (copula m.sg) - ⲡⲛⲟⲩⲧⲉ (predicate)"
138
+ },
139
+ {
140
+ "coptic": "ⲛⲧⲟϥ ⲡⲉ ⲡⲣⲣⲟ",
141
+ "english": "He is the king",
142
+ "analysis": "ⲛⲧⲟϥ (subject) - ⲡⲉ (copula) - ⲡⲣⲣⲟ (predicate)"
143
+ }
144
+ ]
145
+ },
146
+ {
147
+ "pattern": "Perfect Tense (ⲁ-)",
148
+ "structure": "ⲁ- + Subject + Verb",
149
+ "examples": [
150
+ {
151
+ "coptic": "ⲁϥⲃⲱⲕ",
152
+ "english": "he went",
153
+ "analysis": "ⲁ- (perfect marker) + ϥ (he) + ⲃⲱⲕ (go)"
154
+ },
155
+ {
156
+ "coptic": "ⲁⲓⲉⲓ",
157
+ "english": "I came",
158
+ "analysis": "ⲁ- + ⲓ (I) + ⲉⲓ (come)"
159
+ }
160
+ ]
161
+ },
162
+ {
163
+ "pattern": "Circumstantial Clause (ⲉ-)",
164
+ "structure": "ⲉ- + Subject + Verb",
165
+ "examples": [
166
+ {
167
+ "coptic": "ⲁϥⲛⲁⲩ ⲉⲣⲟⲓ ⲉⲓⲙⲟⲟϣⲉ",
168
+ "english": "he saw me while I was walking",
169
+ "analysis": "Main clause + circumstantial ⲉⲓⲙⲟⲟϣⲉ (while I walk)"
170
+ }
171
+ ]
172
+ }
173
+ ]
174
+ }
175
+ }
176
+ }