source: nutchez-0.1/conf/domain-suffixes.xsd @ 222

Last change on this file since 222 was 66, checked in by waue, 16 years ago

NutchEz - an easy way to nutch

  • Property svn:executable set to *
File size: 4.5 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<!--
3  Licensed to the Apache Software Foundation (ASF) under one or more
4  contributor license agreements.  See the NOTICE file distributed with
5  this work for additional information regarding copyright ownership.
6  The ASF licenses this file to You under the Apache License, Version 2.0
7  (the "License"); you may not use this file except in compliance with
8  the License.  You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12  Unless required by applicable law or agreed to in writing, software
13  distributed under the License is distributed on an "AS IS" BASIS,
14  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  See the License for the specific language governing permissions and
16  limitations under the License.
17-->
18
19<!--
20  Document   : domain-suffixes.xsd
21  Author     : Enis Soztutar - enis.soz.nutch@gmail.com
22  Description: This document is the schema for valid domain-suffixes
23  definitions. For successful parsing of domain-suffixes xml files,
24  the xml file should be validated with this xsd.
25  See        : org.apache.nutch.util.domain.DomainSuffixesReader.java
26-->
27
28<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
29  targetNamespace="http://lucene.apache.org/nutch"
30  xmlns="http://lucene.apache.org/nutch"
31  elementFormDefault="qualified">
32
33  <xs:element name="domains">
34    <xs:complexType>
35      <xs:sequence>
36        <xs:element name="tlds">
37          <xs:complexType>
38            <xs:sequence>
39              <xs:element name="itlds">
40                <xs:complexType>
41                  <xs:sequence>
42                    <xs:element name="tld" maxOccurs="unbounded"
43                      type="gtld" />
44                  </xs:sequence>
45                </xs:complexType>
46              </xs:element>
47
48              <xs:element name="gtlds">
49                <xs:complexType>
50                  <xs:sequence>
51                    <xs:element name="tld" maxOccurs="unbounded"
52                      type="gtld" />
53                  </xs:sequence>
54                </xs:complexType>
55              </xs:element>
56
57              <xs:element name="cctlds">
58                <xs:complexType>
59                  <xs:sequence>
60                    <xs:element name="tld" maxOccurs="unbounded"
61                      type="cctld" />
62                  </xs:sequence>
63                </xs:complexType>
64              </xs:element>
65
66            </xs:sequence>
67          </xs:complexType>
68        </xs:element>
69
70        <xs:element name="suffixes">
71          <xs:complexType>
72            <xs:sequence>
73              <xs:element name="suffix" maxOccurs="unbounded"
74                type="sldType" />
75            </xs:sequence>
76          </xs:complexType>
77        </xs:element>
78      </xs:sequence>
79    </xs:complexType>
80  </xs:element>
81
82  <xs:complexType name="gtld">
83    <xs:sequence>
84      <xs:element name="status" minOccurs="0">
85        <xs:simpleType>
86          <xs:restriction base="xs:string">
87            <xs:enumeration value="INFRASTRUCTURE" />
88            <xs:enumeration value="SPONSORED" />
89            <xs:enumeration value="UNSPONSORED" />
90            <xs:enumeration value="STARTUP" />
91            <xs:enumeration value="PROPOSED" />
92            <xs:enumeration value="DELETED" />
93            <xs:enumeration value="PSEUDO_DOMAIN" />
94          </xs:restriction>
95        </xs:simpleType>
96      </xs:element>
97      <xs:element name="boost" type="xs:float" minOccurs="0" />
98      <xs:element name="description" type="xs:string" minOccurs="0" />
99    </xs:sequence>
100    <xs:attribute name="domain" type="xs:string" />
101  </xs:complexType>
102
103  <xs:complexType name="cctld">
104    <xs:sequence>
105      <xs:element name="country" type="xs:string" />
106      <xs:element name="status" type="statusType" minOccurs="0" />
107      <xs:element name="boost" type="xs:float" minOccurs="0" />
108      <xs:element name="description" type="xs:string" minOccurs="0" />
109    </xs:sequence>
110    <xs:attribute name="domain" type="xs:string" />
111  </xs:complexType>
112
113  <xs:complexType name="sldType">
114    <xs:sequence>
115      <xs:element name="status" type="statusType" minOccurs="0" />
116      <xs:element name="boost" type="xs:float" minOccurs="0" />
117      <xs:element name="description" type="xs:string" minOccurs="0" />
118    </xs:sequence>
119    <xs:attribute name="domain" type="xs:string" />
120  </xs:complexType>
121
122  <xs:simpleType name="statusType">
123    <xs:restriction base="xs:string">
124      <xs:enumeration value="IN_USE" />
125      <xs:enumeration value="NOT_IN_USE" />
126      <xs:enumeration value="DELETED" />
127    </xs:restriction>
128  </xs:simpleType>
129
130</xs:schema>
Note: See TracBrowser for help on using the repository browser.