mirror of
https://github.com/ceph/ceph
synced 2025-03-11 02:39:05 +00:00
Merge branch 'wip-3610' into next
This commit is contained in:
commit
b63940caa1
@ -146,15 +146,30 @@ appropriate placement groups in the secondary and tertiary OSDs (as many OSDs as
|
||||
additional replicas), and responds to the client once it has confirmed the
|
||||
object was stored successfully.
|
||||
|
||||
.. ditaa:: +--------+ Write +--------------+ Replica 1 +----------------+
|
||||
| Client |*-------------->| Primary OSD |*---------------->| Secondary OSD |
|
||||
| |<--------------*| |<----------------*| |
|
||||
+--------+ Write Ack +--------------+ Replica 1 Ack +----------------+
|
||||
^ *
|
||||
| | Replica 2 +----------------+
|
||||
| +----------------------->| Tertiary OSD |
|
||||
+--------------------------*| |
|
||||
Replica 2 Ack +----------------+
|
||||
|
||||
.. ditaa::
|
||||
+----------+
|
||||
| Client |
|
||||
| |
|
||||
+----------+
|
||||
* ^
|
||||
Write (1) | | Ack (6)
|
||||
| |
|
||||
v *
|
||||
+-------------+
|
||||
| Primary OSD |
|
||||
| |
|
||||
+-------------+
|
||||
* ^ ^ *
|
||||
Write (2) | | | | Write (3)
|
||||
+------+ | | +------+
|
||||
| +------+ +------+ |
|
||||
| | Ack (4) Ack (5)| |
|
||||
v * * v
|
||||
+---------------+ +---------------+
|
||||
| Secondary OSD | | Tertiary OSD |
|
||||
| | | |
|
||||
+---------------+ +---------------+
|
||||
|
||||
|
||||
Since any network device has a limit to the number of concurrent connections it
|
||||
@ -222,82 +237,84 @@ of striping:
|
||||
|
||||
|
||||
If you anticipate large images sizes, large S3 or Swift objects (video), or
|
||||
large CephFS files, you may see considerable read/write performance improvements
|
||||
by striping client data over mulitple objects within an object set. Significant
|
||||
write performance occurs when the client writes the stripe units to their
|
||||
corresponding objects simultaneously. Since objects get mapped to different
|
||||
placement groups and further mapped to different OSDs, each write occurs
|
||||
simultaneously at the maximum write speed. So the stripe count may serve as a
|
||||
proxy for the multiple of the performance improvement. Read performance is
|
||||
similarly affected. However, setting up connections between the client and the
|
||||
OSDs and the network latency also play a role in the overall performance.
|
||||
large CephFS directories, you may see considerable read/write performance
|
||||
improvements by striping client data over mulitple objects within an object set.
|
||||
Significant write performance occurs when the client writes the stripe units to
|
||||
their corresponding objects in parallel. Since objects get mapped to different
|
||||
placement groups and further mapped to different OSDs, each write occurs in
|
||||
parallel at the maximum write speed. A write to a single disk would be limited
|
||||
by the head movement (e.g. 6ms per seek) and bandwidth of that one device (e.g.
|
||||
100MB/s). By spreading that write over multiple objects (which map to different
|
||||
placement groups and OSDs) Ceph can reduce the number of seeks per drive and
|
||||
combine the throughput of multiple drives to achieve much faster write (or read)
|
||||
speeds.
|
||||
|
||||
In the following diagram, client data gets striped across an object set
|
||||
(``object set 1`` in the following diagram) consisting of 4 objects, where the
|
||||
first stripe unit is ``stripe 0`` in ``object 0``, and the fourth stripe unit is
|
||||
``stripe 3`` in ``object 3``. After writing the fourth stripe, the client
|
||||
determines if the object set is full. If the object set is not full, the client
|
||||
begins writing a stripe to the first object again (``object 0`` in the following
|
||||
diagram). If the object set is full, the client creates a new object set
|
||||
(``object set 2`` in the following diagram), and begins writing to the first
|
||||
stripe (``stripe 4``) in the first object in the new object set (``object 4`` in
|
||||
the diagram below).
|
||||
first stripe unit is ``stripe unit 0`` in ``object 0``, and the fourth stripe
|
||||
unit is ``stripe unit 3`` in ``object 3``. After writing the fourth stripe, the
|
||||
client determines if the object set is full. If the object set is not full, the
|
||||
client begins writing a stripe to the first object again (``object 0`` in the
|
||||
following diagram). If the object set is full, the client creates a new object
|
||||
set (``object set 2`` in the following diagram), and begins writing to the first
|
||||
stripe (``stripe unit 16``) in the first object in the new object set (``object
|
||||
4`` in the diagram below).
|
||||
|
||||
.. ditaa::
|
||||
+---------------+
|
||||
| Client Data |
|
||||
| Format |
|
||||
| cCCC |
|
||||
+---------------+
|
||||
|
|
||||
+-----------------+--------+--------+-----------------+
|
||||
| | | | +--\
|
||||
v v v v |
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 0 | | unit 1 | | unit 2 | | unit 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 4 | | unit 5 | | unit 6 | | unit 7 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 1
|
||||
| unit 8 | | unit 9 | | unit 10 | | unit 11 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 12 | | unit 13 | | unit 14 | | unit 15 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
+---------------+
|
||||
| Client Data |
|
||||
| Format |
|
||||
| cCCC |
|
||||
+---------------+
|
||||
|
|
||||
+-----------------+--------+--------+-----------------+
|
||||
| | | | +--\
|
||||
v v v v |
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 0 | | unit 1 | | unit 2 | | unit 3 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 4 | | unit 5 | | unit 6 | | unit 7 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 1
|
||||
| unit 8 | | unit 9 | | unit 10 | | unit 11 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 12 | | unit 13 | | unit 14 | | unit 15 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 0 | | Object 1 | | Object 2 | | Object 3 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
|
||||
+--\
|
||||
|
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 15 | | unit 16 | | unit 17 | | unit 18 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 19 | | unit 20 | | unit 21 | | unit 22 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 2
|
||||
| unit 23 | | unit 24 | | unit 25 | | unit 26 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 27 | | unit 28 | | unit 29 | | unit 30 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
+--\
|
||||
|
|
||||
/-----------\ /-----------\ /-----------\ /-----------\ |
|
||||
| Begin cCCC| | Begin cCCC| | Begin cCCC| | Begin cCCC| |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 16 | | unit 17 | | unit 18 | | unit 19 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | +-\
|
||||
| unit 20 | | unit 21 | | unit 22 | | unit 23 | | Object
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ +- Set
|
||||
| stripe | | stripe | | stripe | | stripe | | 2
|
||||
| unit 24 | | unit 25 | | unit 26 | | unit 27 | +-/
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| stripe | | stripe | | stripe | | stripe | |
|
||||
| unit 28 | | unit 29 | | unit 30 | | unit 31 | |
|
||||
+-----------+ +-----------+ +-----------+ +-----------+ |
|
||||
| End cCCC | | End cCCC | | End cCCC | | End cCCC | |
|
||||
| Object 4 | | Object 5 | | Object 6 | | Object 7 | |
|
||||
\-----------/ \-----------/ \-----------/ \-----------/ |
|
||||
|
|
||||
+--/
|
||||
|
||||
Three important variables determine how Ceph stripes data:
|
||||
|
||||
@ -306,9 +323,9 @@ Three important variables determine how Ceph stripes data:
|
||||
enough to accomodate many stripe units, and should be a multiple of
|
||||
the stripe unit.
|
||||
|
||||
- **Stripe Unit:** Stripes have a configurable unit size (e.g., 64kb).
|
||||
- **Stripe Width:** Stripes have a configurable unit size (e.g., 64kb).
|
||||
The Ceph client divides the data it will write to objects into equally
|
||||
sized stripe units, except for the last stripe unit. A stripe unit,
|
||||
sized stripe units, except for the last stripe unit. A stripe width,
|
||||
should be a fraction of the Object Size so that an object may contain
|
||||
many stripe units.
|
||||
|
||||
@ -347,7 +364,11 @@ storage disk. See `How Ceph Scales`_ for details.
|
||||
get mapped to placement groups in the same pool. So they use the same CRUSH
|
||||
map and the same access controls.
|
||||
|
||||
.. tip:: The objects Ceph stores in the Object Store are not striped.
|
||||
.. tip:: The objects Ceph stores in the Object Store are not striped. RGW, RBD
|
||||
and CephFS automatically stripe their data over multiple RADOS objects.
|
||||
Clients that write directly to the Object Store via ``librados`` must
|
||||
peform the the striping (and parallel I/O) for themselves to obtain these
|
||||
benefits.
|
||||
|
||||
|
||||
Data Consistency
|
||||
|
@ -24,7 +24,8 @@ glibc
|
||||
- **syncfs(2)**: For non-btrfs filesystems such as XFS and ext4 where
|
||||
more than one ``ceph-osd`` daemon is used on a single server, Ceph
|
||||
performs signficantly better with the ``syncfs(2)`` system call
|
||||
(added in kernel 2.6.39 and glibc 2.14).
|
||||
(added in kernel 2.6.39 and glibc 2.14). New versions of Ceph (v0.55 and
|
||||
later) do not depend on glibc support.
|
||||
|
||||
|
||||
Platforms
|
||||
@ -49,12 +50,12 @@ Argonaut (0.48)
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Ubuntu | 12.10 | Quantal Quetzal | linux-3.5.4 | 2 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2 | B |
|
||||
| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2, 3 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Debian | 7.0 | Wheezy | linux-3.2.0 | 1, 2 | B |
|
||||
| Debian | 7.0 | Wheezy | linux-3.2.0 | 1, 2, 3 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
|
||||
Bobtail (0.55)
|
||||
Bobtail (0.56)
|
||||
--------------
|
||||
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
@ -62,17 +63,17 @@ Bobtail (0.55)
|
||||
+==========+==========+====================+==============+=========+============+
|
||||
| Ubuntu | 11.04 | Natty Narwhal | linux-2.6.38 | 1, 2, 3 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Ubuntu | 11.10 | Oneric Ocelot | linux-3.0.0 | 1, 2, 3 | B |
|
||||
| Ubuntu | 11.10 | Oneric Ocelot | linux-3.0.0 | 1, 2 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Ubuntu | 12.04 | Precise Pangolin | linux-3.2.0 | 1, 2 | B, I, C |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Ubuntu | 12.10 | Quantal Quetzal | linux-3.5.4 | 2 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2 | B |
|
||||
| Debian | 6.0 | Squeeze | linux-2.6.32 | 1, 2, 3 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Debian | 7.0 | Wheezy | linux-3.2.0 | 1, 2 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| CentOS | 6.3 | N/A | linux-2.6.32 | 1, 2, 3 | B, I |
|
||||
| CentOS | 6.3 | N/A | linux-2.6.32 | 1, 2 | B, I |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
| Fedora | 17.0 | Beefy Miracle | linux-3.3.4 | 1, 2 | B |
|
||||
+----------+----------+--------------------+--------------+---------+------------+
|
||||
@ -93,10 +94,10 @@ Notes
|
||||
for kernel client (kernel RBD or the Ceph file system). Upgrade to a
|
||||
recommended kernel.
|
||||
|
||||
- **3**: The installed version of ``glibc`` does not support the
|
||||
``syncfs(2)`` system call. Putting multiple ``ceph-osd`` daemons
|
||||
using ``XFS`` or ``ext4`` on the same host will not perform as well as
|
||||
they could.
|
||||
- **3**: The default kernel or installed version of ``glibc`` does not
|
||||
support the ``syncfs(2)`` system call. Putting multiple
|
||||
``ceph-osd`` daemons using ``XFS`` or ``ext4`` on the same host will
|
||||
not perform as well as they could.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
@ -205,7 +205,7 @@ Journal
|
||||
|
||||
``filestore journal parallel``
|
||||
|
||||
:Description:
|
||||
:Description: Enables parallel journaling, default for btrfs.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -213,7 +213,7 @@ Journal
|
||||
|
||||
``filestore journal writeahead``
|
||||
|
||||
:Description:
|
||||
:Description: Enables writeahead journaling, default for xfs.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -221,7 +221,7 @@ Journal
|
||||
|
||||
``filestore journal trailing``
|
||||
|
||||
:Description:
|
||||
:Description: Deprecated, never use.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -233,7 +233,7 @@ Misc
|
||||
|
||||
``filestore merge threshold``
|
||||
|
||||
:Description:
|
||||
:Description: Min number of files in a subdir before merging into parent
|
||||
:Type: Integer
|
||||
:Required: No
|
||||
:Default: ``10``
|
||||
@ -241,7 +241,7 @@ Misc
|
||||
|
||||
``filestore split multiple``
|
||||
|
||||
:Description:
|
||||
:Description: filestore_split_multiple*filestore_merge_threshold*16 is the max files in a subdir before splitting into child directories.
|
||||
:Type: Integer
|
||||
:Required: No
|
||||
:Default: ``2``
|
||||
@ -249,7 +249,7 @@ Misc
|
||||
|
||||
``filestore update to``
|
||||
|
||||
:Description:
|
||||
:Description: Limits filestore auto upgrade to specified version.
|
||||
:Type: Integer
|
||||
:Required: No
|
||||
:Default: ``1000``
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
``ms tcp nodelay``
|
||||
|
||||
:Description:
|
||||
:Description: Disables nagle's algorithm on messenger tcp sessions.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``true``
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
``ms initial backoff``
|
||||
|
||||
:Description:
|
||||
:Description: The initial time to wait before reconnecting on a fault.
|
||||
:Type: Double
|
||||
:Required: No
|
||||
:Default: ``.2``
|
||||
@ -21,7 +21,7 @@
|
||||
|
||||
``ms max backoff``
|
||||
|
||||
:Description:
|
||||
:Description: The maximum time to wait before reconnecting on a fault.
|
||||
:Type: Double
|
||||
:Required: No
|
||||
:Default: ``15.0``
|
||||
@ -29,7 +29,7 @@
|
||||
|
||||
``ms nocrc``
|
||||
|
||||
:Description:
|
||||
:Description: Disables crc on network messages. May increase performance if cpu limited.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -37,7 +37,7 @@
|
||||
|
||||
``ms die on bad msg``
|
||||
|
||||
:Description:
|
||||
:Description: Debug option; do not configure.
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
``ms dispatch throttle bytes``
|
||||
|
||||
:Description:
|
||||
:Description: Throttles total size of messages waiting to be dispatched.
|
||||
:Type: 64-bit Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``100 << 20``
|
||||
@ -53,7 +53,7 @@
|
||||
|
||||
``ms bind ipv6``
|
||||
|
||||
:Description:
|
||||
:Description: Enable if you want your daemons to bind to IPv6 address instead of IPv4 ones. (Not required if you specify a daemon or cluster IP.)
|
||||
:Type: Boolean
|
||||
:Required: No
|
||||
:Default: ``false``
|
||||
@ -61,7 +61,7 @@
|
||||
|
||||
``ms rwthread stack bytes``
|
||||
|
||||
:Description:
|
||||
:Description: Debug option for stack size; do not configure.
|
||||
:Type: 64-bit Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``1024 << 10``
|
||||
@ -69,7 +69,7 @@
|
||||
|
||||
``ms tcp read timeout``
|
||||
|
||||
:Description:
|
||||
:Description: Controls how long (in seconds) the messenger will wait before closing an idle connection.
|
||||
:Type: 64-bit Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``900``
|
||||
@ -77,7 +77,7 @@
|
||||
|
||||
``ms inject socket failures``
|
||||
|
||||
:Description:
|
||||
:Description: Debug option; do not configure.
|
||||
:Type: 64-bit Unsigned Integer
|
||||
:Required: No
|
||||
:Default: ``0``
|
||||
|
@ -19,7 +19,7 @@ Enter a password for the root user.
|
||||
On the admin host, generate an ``ssh`` key without specifying a passphrase
|
||||
and use the default locations. ::
|
||||
|
||||
sudo -s
|
||||
sudo -i
|
||||
ssh-keygen
|
||||
Generating public/private key pair.
|
||||
Enter file in which to save the key (/root/.ssh/id_rsa):
|
||||
@ -27,14 +27,8 @@ and use the default locations. ::
|
||||
Enter same passphrase again:
|
||||
Your identification has been saved in /root/.ssh/id_rsa.
|
||||
Your public key has been saved in /root/.ssh/id_rsa.pub.
|
||||
|
||||
You may use RSA or DSA keys. Once you generate your keys, copy them to each
|
||||
OSD host. For example::
|
||||
|
||||
ssh-copy-id root@myserver01
|
||||
ssh-copy-id root@myserver02
|
||||
|
||||
Modify your ``~/.ssh/config`` file to login as ``root``, as follows::
|
||||
Modify your ``/root/.ssh/config`` file to login as ``root``, as follows::
|
||||
|
||||
Host myserver01
|
||||
Hostname myserver01.fully-qualified-domain.com
|
||||
@ -43,6 +37,11 @@ Modify your ``~/.ssh/config`` file to login as ``root``, as follows::
|
||||
Hostname myserver02.fully-qualified-domain.com
|
||||
User root
|
||||
|
||||
You may use RSA or DSA keys. Once you generate your keys, copy them to each
|
||||
OSD host. For example::
|
||||
|
||||
ssh-copy-id root@myserver01
|
||||
ssh-copy-id root@myserver02
|
||||
|
||||
Copy Configuration File to All Hosts
|
||||
====================================
|
||||
@ -55,9 +54,9 @@ if you are using ``mkcephfs`` to deploy Ceph.
|
||||
|
||||
::
|
||||
|
||||
ssh myserver01 sudo tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
ssh myserver02 sudo tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
ssh myserver03 sudo tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
sudo ssh myserver01 tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
sudo ssh myserver02 tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
sudo ssh myserver03 tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
|
||||
|
||||
Create the Default Directories
|
||||
@ -115,7 +114,7 @@ root password. See `Authentication`_ when running with ``cephx`` enabled.
|
||||
When you start or stop your cluster, you will not have to use ``sudo`` or
|
||||
provide passwords. For example::
|
||||
|
||||
service ceph -a start
|
||||
sudo service ceph -a start
|
||||
|
||||
See `Operating a Cluster`_ for details.
|
||||
|
||||
|
@ -107,10 +107,9 @@ hard disks than older hosts in the cluster (i.e., they may have greater weight).
|
||||
|
||||
ssh {new-osd} sudo tee /etc/ceph/ceph.conf < /etc/ceph/ceph.conf
|
||||
|
||||
#. Create the OSD. ::
|
||||
#. Create the OSD. If no UUID is given, it will be set automatically when the OSD starts up. ::
|
||||
|
||||
ceph osd create {osd-num}
|
||||
ceph osd create 123 #for example
|
||||
ceph osd create [{uuid}]
|
||||
|
||||
#. Initialize the OSD data directory. ::
|
||||
|
||||
|
@ -151,10 +151,10 @@ Mark an OSD as lost. This may result in permanent data loss. Use with caution. :
|
||||
|
||||
ceph osd lost [--yes-i-really-mean-it]
|
||||
|
||||
Create a new OSD. If no ID is given, a new ID is automatically selected
|
||||
if possible. ::
|
||||
Create a new OSD. If no UUID is given, it will be set automatically when the OSD
|
||||
starts up. ::
|
||||
|
||||
ceph osd create [{id}]
|
||||
ceph osd create [{uuid}]
|
||||
|
||||
Remove the given OSD(s). ::
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -113,7 +113,7 @@ See `<IfModule> Directive`_ for additional details.
|
||||
.. _<IfModule> Directive: http://httpd.apache.org/docs/2.2/mod/core.html#ifmodule
|
||||
|
||||
Finally, you should configure Apache to allow encoded slashes, provide paths for
|
||||
log files and to trun off server signatures. ::
|
||||
log files and to turn off server signatures. ::
|
||||
|
||||
<VirtualHost *:80>
|
||||
...
|
||||
@ -171,7 +171,7 @@ Add to Ceph Keyring Entries
|
||||
Once you have created a keyring and key for RADOS GW, add it as an entry in
|
||||
the Ceph keyring. For example::
|
||||
|
||||
ceph -k /etc/ceph/ceph.keyring auth add client.radosgw.gateway -i /etc/ceph/keyring.radosgw.gateway
|
||||
sudo ceph -k /etc/ceph/ceph.keyring auth add client.radosgw.gateway -i /etc/ceph/keyring.radosgw.gateway
|
||||
|
||||
|
||||
Restart Services and Start the RADOS Gateway
|
||||
|
@ -32,9 +32,9 @@ This also prints out the bucket name and creation date of each bucket.
|
||||
.. code-block:: python
|
||||
|
||||
for bucket in conn.get_all_buckets():
|
||||
print "{name}\t{created}".format(
|
||||
name = bucket.name,
|
||||
created = bucket.creation_date,
|
||||
print "{name}\t{created}".format(
|
||||
name = bucket.name,
|
||||
created = bucket.creation_date,
|
||||
)
|
||||
|
||||
The output will look something like this::
|
||||
|
@ -49,7 +49,7 @@ XFS_MKFS_OPTIONS="-l su=32k"
|
||||
# until we can work through getting them all passing reliably.
|
||||
TESTS="1-9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76"
|
||||
TESTS="${TESTS} 78 79 84-89 91-92 100 103 105 108 110 116-121 124 126"
|
||||
TESTS="${TESTS} 129-135 137-141 164-167 179 182-184 186-190 192 194"
|
||||
TESTS="${TESTS} 129-135 137-141 164-167 182 184 186-190 192 194"
|
||||
TESTS="${TESTS} 196 199 201 203 214-216 220-227 234 236-238 241 243-249"
|
||||
TESTS="${TESTS} 253 257-259 261 262 269 273 275 277 278 280 285 286"
|
||||
# 275 was the highest available test as of 4/10/12.
|
||||
|
@ -3,5 +3,7 @@
|
||||
mkdir /tmp/cephtest
|
||||
wget https://raw.github.com/ceph/ceph/master/qa/run_xfstests.sh
|
||||
chmod +x run_xfstests.sh
|
||||
# tests excluded require extra packages for advanced acl and quota support
|
||||
./run_xfstests.sh -c 1 -f xfs -t /dev/vdb -s /dev/vdc 1-26 28-49 51-63 65-83 85-233 235-291
|
||||
# tests excluded fail in the current testing vm regardless of whether
|
||||
# rbd is used
|
||||
|
||||
./run_xfstests.sh -c 1 -f xfs -t /dev/vdb -s /dev/vdc 1-17 19-26 28-49 51-61 63 66-79 83 85-105 108-110 112-135 137-170 172-204 206-217 220-227 230-231 233 235-241 243-249 251-262 264-278 281-286 288-289
|
||||
|
@ -228,10 +228,12 @@ bin_DEBUGPROGRAMS += smalliobench
|
||||
|
||||
smalliobenchfs_SOURCES = test/bench/small_io_bench_fs.cc test/bench/filestore_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
|
||||
smalliobenchfs_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
|
||||
smalliobenchfs_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
|
||||
bin_DEBUGPROGRAMS += smalliobenchfs
|
||||
|
||||
smalliobenchdumb_SOURCES = test/bench/small_io_bench_dumb.cc test/bench/dumb_backend.cc test/bench/detailed_stat_collector.cc test/bench/bencher.cc
|
||||
smalliobenchdumb_LDADD = librados.la -lboost_program_options $(LIBOS_LDA) $(LIBGLOBAL_LDA)
|
||||
smalliobenchdumb_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
|
||||
bin_DEBUGPROGRAMS += smalliobenchdumb
|
||||
|
||||
tpbench_SOURCES = test/bench/tp_bench.cc test/bench/detailed_stat_collector.cc
|
||||
@ -862,6 +864,7 @@ test_mon_workloadgen_SOURCES = \
|
||||
osdc/Objecter.cc \
|
||||
osdc/Striper.cc
|
||||
test_mon_workloadgen_LDADD = $(LIBOS_LDA) $(LIBGLOBAL_LDA)
|
||||
test_mon_workloadgen_CXXFLAGS = ${CRYPTO_CXXFLAGS} ${AM_CXXFLAGS}
|
||||
bin_DEBUGPROGRAMS += test_mon_workloadgen
|
||||
|
||||
test_rados_api_io_SOURCES = test/librados/io.cc test/librados/test.cc
|
||||
|
@ -3656,6 +3656,12 @@ void Client::flush_cap_releases()
|
||||
|
||||
void Client::tick()
|
||||
{
|
||||
if (cct->_conf->client_debug_inject_tick_delay > 0) {
|
||||
sleep(cct->_conf->client_debug_inject_tick_delay);
|
||||
assert(0 == cct->_conf->set_val("client_debug_inject_tick_delay", "0"));
|
||||
cct->_conf->apply_changes(NULL);
|
||||
}
|
||||
|
||||
ldout(cct, 21) << "tick" << dendl;
|
||||
tick_event = new C_C_Tick(this);
|
||||
timer.add_event_after(cct->_conf->client_tick_interval, tick_event);
|
||||
|
@ -183,6 +183,7 @@ OPTION(client_oc_target_dirty, OPT_INT, 1024*1024* 8) // target dirty (keep this
|
||||
OPTION(client_oc_max_dirty_age, OPT_DOUBLE, 5.0) // max age in cache before writeback
|
||||
OPTION(client_oc_max_objects, OPT_INT, 1000) // max objects in cache
|
||||
OPTION(client_debug_force_sync_read, OPT_BOOL, false) // always read synchronously (go to osds)
|
||||
OPTION(client_debug_inject_tick_delay, OPT_INT, 0) // delay the client tick for a number of seconds
|
||||
// note: the max amount of "in flight" dirty data is roughly (max - target)
|
||||
OPTION(fuse_use_invalidate_cb, OPT_BOOL, false) // use fuse 2.8+ invalidate callback to keep page cache consistent
|
||||
OPTION(fuse_big_writes, OPT_BOOL, true)
|
||||
|
@ -1227,16 +1227,14 @@ int FileStore::_detect_fs()
|
||||
} else {
|
||||
dout(0) << "mount syncfs(2) syscall supported by glibc BUT NOT the kernel" << dendl;
|
||||
}
|
||||
#endif
|
||||
#ifdef SYS_syncfs
|
||||
#elif defined(SYS_syncfs)
|
||||
if (syscall(SYS_syncfs, fd) == 0) {
|
||||
dout(0) << "mount syscall(SYS_syncfs, fd) fully supported" << dendl;
|
||||
have_syncfs = true;
|
||||
} else {
|
||||
dout(0) << "mount syscall(SYS_syncfs, fd) supported by libc BUT NOT the kernel" << dendl;
|
||||
}
|
||||
#endif
|
||||
#ifdef __NR_syncfs
|
||||
#elif defined(__NR_syncfs)
|
||||
if (syscall(__NR_syncfs, fd) == 0) {
|
||||
dout(0) << "mount syscall(__NR_syncfs, fd) fully supported" << dendl;
|
||||
have_syncfs = true;
|
||||
|
@ -268,6 +268,11 @@ int RGWCache<T>::get_obj(void *ctx, void **handle, rgw_obj& obj, bufferlist& obl
|
||||
return r;
|
||||
}
|
||||
|
||||
if (obl.length() == end + 1) {
|
||||
/* in this case, most likely object contains more data, we can't cache it */
|
||||
return r;
|
||||
}
|
||||
|
||||
bufferptr p(r);
|
||||
bufferlist& bl = info.data;
|
||||
bl.clear();
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#define dout_subsys ceph_subsys_rgw
|
||||
|
||||
#define READ_CHUNK_LEN (16 * 1024)
|
||||
#define READ_CHUNK_LEN (512 * 1024)
|
||||
|
||||
static map<string, string> ext_mime_map;
|
||||
|
||||
@ -41,25 +41,24 @@ int rgw_get_obj(RGWRados *rgwstore, void *ctx, rgw_bucket& bucket, string& key,
|
||||
bufferlist::iterator iter;
|
||||
int request_len = READ_CHUNK_LEN;
|
||||
rgw_obj obj(bucket, key);
|
||||
ret = rgwstore->prepare_get_obj(ctx, obj, NULL, NULL, pattrs, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, &handle, &err);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
do {
|
||||
ret = rgwstore->get_obj(ctx, &handle, obj, bl, 0, request_len - 1);
|
||||
ret = rgwstore->prepare_get_obj(ctx, obj, NULL, NULL, pattrs, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, &handle, &err);
|
||||
if (ret < 0)
|
||||
goto done;
|
||||
return ret;
|
||||
|
||||
ret = rgwstore->get_obj(ctx, &handle, obj, bl, 0, request_len - 1);
|
||||
rgwstore->finish_get_obj(&handle);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret < request_len)
|
||||
break;
|
||||
bl.clear();
|
||||
request_len *= 2;
|
||||
} while (true);
|
||||
|
||||
ret = 0;
|
||||
done:
|
||||
rgwstore->finish_get_obj(&handle);
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void parse_mime_map_line(const char *start, const char *end)
|
||||
|
@ -26,60 +26,14 @@
|
||||
#include <sys/xattr.h>
|
||||
#include <signal.h>
|
||||
|
||||
void do_sigusr1(int s) {}
|
||||
|
||||
// wait_and_suspend() forks the process, waits for the
|
||||
// child to signal SIGUSR1, suspends the child with SIGSTOP
|
||||
// sleeps for s seconds, and then unsuspends the child,
|
||||
// waits for the child to exit, and then returns the exit code
|
||||
// of the child
|
||||
static int _wait_and_suspend(int s) {
|
||||
|
||||
int fpid = fork();
|
||||
if (fpid != 0) {
|
||||
// wait for child to signal
|
||||
signal(SIGUSR1, &do_sigusr1);
|
||||
sigset_t set;
|
||||
sigaddset(&set, SIGUSR1);
|
||||
int sig;
|
||||
sigwait(&set, &sig);
|
||||
|
||||
// fork and suspend child, sleep for 20 secs, and resume
|
||||
kill(fpid, SIGSTOP);
|
||||
sleep(s);
|
||||
kill(fpid, SIGCONT);
|
||||
int status;
|
||||
wait(&status);
|
||||
if (WIFEXITED(status))
|
||||
return WEXITSTATUS(status);
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// signal_for_suspend sends the parent the SIGUSR1 signal
|
||||
// and sleeps for 1 second so that it can be suspended at the
|
||||
// point of the call
|
||||
static void _signal_for_suspend() {
|
||||
kill(getppid(), SIGUSR1);
|
||||
}
|
||||
|
||||
TEST(Caps, ReadZero) {
|
||||
|
||||
int w = _wait_and_suspend(20);
|
||||
if (w >= 0) {
|
||||
ASSERT_EQ(0, w);
|
||||
return;
|
||||
}
|
||||
|
||||
pid_t mypid = getpid();
|
||||
int mypid = getpid();
|
||||
struct ceph_mount_info *cmount;
|
||||
ASSERT_EQ(0, ceph_create(&cmount, NULL));
|
||||
ASSERT_EQ(0, ceph_conf_read_file(cmount, NULL));
|
||||
ASSERT_EQ(0, ceph_mount(cmount, "/"));
|
||||
|
||||
ASSERT_EQ(0, ceph_conf_set(cmount, "client_cache_size", "10"));
|
||||
|
||||
int i = 0;
|
||||
for(; i < 30; ++i) {
|
||||
|
||||
@ -114,7 +68,7 @@ TEST(Caps, ReadZero) {
|
||||
ASSERT_EQ(expect, caps & expect);
|
||||
}
|
||||
|
||||
_signal_for_suspend();
|
||||
ASSERT_EQ(0, ceph_conf_set(cmount, "client_debug_inject_tick_delay", "20"));
|
||||
|
||||
for(i = 0; i < 30; ++i) {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user